src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.o_data)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         #pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         pdecode = self.pdecode2.dec
 232
 233         if self.svp64_en:
 234             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 235
 236         # Test Instruction memory
 237         self.imem = ConfigFetchUnit(pspec).fu
 238
 239         # DMI interface
 240         self.dbg = CoreDebug()
 241
 242         # instruction go/monitor
 243         self.pc_o = Signal(64, reset_less=True)
 244         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 245         self.svstate_i = Data(64, "svstate_i") # ditto
 246         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 247         self.busy_o = Signal(reset_less=True)
 248         self.memerr_o = Signal(reset_less=True)
 249
 250         # STATE regfile read /write ports for PC, MSR, SVSTATE
 251         staterf = self.core.regs.rf['state']
 252         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 253         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 254         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 255         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 256         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 257
 258         # DMI interface access
 259         intrf = self.core.regs.rf['int']
 260         crrf = self.core.regs.rf['cr']
 261         xerrf = self.core.regs.rf['xer']
 262         self.int_r = intrf.r_ports['dmi'] # INT read
 263         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 264         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 265
 266         if self.svp64_en:
 267             # for predication
 268             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 269             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 270
 271         # hack method of keeping an eye on whether branch/trap set the PC
 272         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 273         self.state_nia.wen.name = 'state_nia_wen'
 274
 275         # pulse to synchronize the simulator at instruction end
 276         self.insn_done = Signal()
 277
 278         if self.svp64_en:
 279             # store copies of predicate masks
 280             self.srcmask = Signal(64)
 281             self.dstmask = Signal(64)
 282
 283     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 284                         fetch_pc_o_ready, fetch_pc_i_valid,
 285                         fetch_insn_o_valid, fetch_insn_i_ready):
 286         """fetch FSM
 287
 288         this FSM performs fetch of raw instruction data, partial-decodes
 289         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 290         read a 2nd 32-bit quantity if that occurs.
 291         """
 292         comb = m.d.comb
 293         sync = m.d.sync
 294         pdecode2 = self.pdecode2
 295         cur_state = self.cur_state
 296         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 297
 298         msr_read = Signal(reset=1)
 299
 300         with m.FSM(name='fetch_fsm'):
 301
 302             # waiting (zzz)
 303             with m.State("IDLE"):
 304                 comb += fetch_pc_o_ready.eq(1)
 305                 with m.If(fetch_pc_i_valid):
 306                     # instruction allowed to go: start by reading the PC
 307                     # capture the PC and also drop it into Insn Memory
 308                     # we have joined a pair of combinatorial memory
 309                     # lookups together.  this is Generally Bad.
 310                     comb += self.imem.a_pc_i.eq(pc)
 311                     comb += self.imem.a_i_valid.eq(1)
 312                     comb += self.imem.f_i_valid.eq(1)
 313                     sync += cur_state.pc.eq(pc)
 314                     sync += cur_state.svstate.eq(svstate) # and svstate
 315
 316                     # initiate read of MSR. arrives one clock later
 317                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 318                     sync += msr_read.eq(0)
 319
 320                     m.next = "INSN_READ"  # move to "wait for bus" phase
 321
 322             # dummy pause to find out why simulation is not keeping up
 323             with m.State("INSN_READ"):
 324                 # one cycle later, msr/sv read arrives.  valid only once.
 325                 with m.If(~msr_read):
 326                     sync += msr_read.eq(1) # yeah don't read it again
 327                     sync += cur_state.msr.eq(self.state_r_msr.o_data)
 328                 with m.If(self.imem.f_busy_o): # zzz...
 329                     # busy: stay in wait-read
 330                     comb += self.imem.a_i_valid.eq(1)
 331                     comb += self.imem.f_i_valid.eq(1)
 332                 with m.Else():
 333                     # not busy: instruction fetched
 334                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 335                     if self.svp64_en:
 336                         svp64 = self.svp64
 337                         # decode the SVP64 prefix, if any
 338                         comb += svp64.raw_opcode_in.eq(insn)
 339                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 340                         # pass the decoded prefix (if any) to PowerDecoder2
 341                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 342                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 343                         # remember whether this is a prefixed instruction, so
 344                         # the FSM can readily loop when VL==0
 345                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 346                         # calculate the address of the following instruction
 347                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 348                         sync += nia.eq(cur_state.pc + insn_size)
 349                         with m.If(~svp64.is_svp64_mode):
 350                             # with no prefix, store the instruction
 351                             # and hand it directly to the next FSM
 352                             sync += dec_opcode_i.eq(insn)
 353                             m.next = "INSN_READY"
 354                         with m.Else():
 355                             # fetch the rest of the instruction from memory
 356                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 357                             comb += self.imem.a_i_valid.eq(1)
 358                             comb += self.imem.f_i_valid.eq(1)
 359                             m.next = "INSN_READ2"
 360                     else:
 361                         # not SVP64 - 32-bit only
 362                         sync += nia.eq(cur_state.pc + 4)
 363                         sync += dec_opcode_i.eq(insn)
 364                         m.next = "INSN_READY"
 365
 366             with m.State("INSN_READ2"):
 367                 with m.If(self.imem.f_busy_o):  # zzz...
 368                     # busy: stay in wait-read
 369                     comb += self.imem.a_i_valid.eq(1)
 370                     comb += self.imem.f_i_valid.eq(1)
 371                 with m.Else():
 372                     # not busy: instruction fetched
 373                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 374                     sync += dec_opcode_i.eq(insn)
 375                     m.next = "INSN_READY"
 376                     # TODO: probably can start looking at pdecode2.rm_dec
 377                     # here or maybe even in INSN_READ state, if svp64_mode
 378                     # detected, in order to trigger - and wait for - the
 379                     # predicate reading.
 380                     if self.svp64_en:
 381                         pmode = pdecode2.rm_dec.predmode
 382                     """
 383                     if pmode != SVP64PredMode.ALWAYS.value:
 384                         fire predicate loading FSM and wait before
 385                         moving to INSN_READY
 386                     else:
 387                         sync += self.srcmask.eq(-1) # set to all 1s
 388                         sync += self.dstmask.eq(-1) # set to all 1s
 389                         m.next = "INSN_READY"
 390                     """
 391
 392             with m.State("INSN_READY"):
 393                 # hand over the instruction, to be decoded
 394                 comb += fetch_insn_o_valid.eq(1)
 395                 with m.If(fetch_insn_i_ready):
 396                     m.next = "IDLE"
 397
 398     def fetch_predicate_fsm(self, m,
 399                             pred_insn_i_valid, pred_insn_o_ready,
 400                             pred_mask_o_valid, pred_mask_i_ready):
 401         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 402            src/dest predicate masks
 403
 404         https://bugs.libre-soc.org/show_bug.cgi?id=617
 405         the predicates can be read here, by using IntRegs r_ports['pred']
 406         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 407         be done through multiple reads, extracting one relevant at a time.
 408         later, a faster way would be to use the 32-bit-wide CR port but
 409         this is more complex decoding, here.  equivalent code used in
 410         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 411
 412         note: this ENTIRE FSM is not to be called when svp64 is disabled
 413         """
 414         comb = m.d.comb
 415         sync = m.d.sync
 416         pdecode2 = self.pdecode2
 417         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 418         predmode = rm_dec.predmode
 419         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 420         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 421         # get src/dst step, so we can skip already used mask bits
 422         cur_state = self.cur_state
 423         srcstep = cur_state.svstate.srcstep
 424         dststep = cur_state.svstate.dststep
 425         cur_vl = cur_state.svstate.vl
 426
 427         # decode predicates
 428         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 429         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 430         sidx, scrinvert = get_predcr(m, srcpred, 's')
 431         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 432
 433         # store fetched masks, for either intpred or crpred
 434         # when src/dst step is not zero, the skipped mask bits need to be
 435         # shifted-out, before actually storing them in src/dest mask
 436         new_srcmask = Signal(64, reset_less=True)
 437         new_dstmask = Signal(64, reset_less=True)
 438
 439         with m.FSM(name="fetch_predicate"):
 440
 441             with m.State("FETCH_PRED_IDLE"):
 442                 comb += pred_insn_o_ready.eq(1)
 443                 with m.If(pred_insn_i_valid):
 444                     with m.If(predmode == SVP64PredMode.INT):
 445                         # skip fetching destination mask register, when zero
 446                         with m.If(dall1s):
 447                             sync += new_dstmask.eq(-1)
 448                             # directly go to fetch source mask register
 449                             # guaranteed not to be zero (otherwise predmode
 450                             # would be SVP64PredMode.ALWAYS, not INT)
 451                             comb += int_pred.addr.eq(sregread)
 452                             comb += int_pred.ren.eq(1)
 453                             m.next = "INT_SRC_READ"
 454                         # fetch destination predicate register
 455                         with m.Else():
 456                             comb += int_pred.addr.eq(dregread)
 457                             comb += int_pred.ren.eq(1)
 458                             m.next = "INT_DST_READ"
 459                     with m.Elif(predmode == SVP64PredMode.CR):
 460                         # go fetch masks from the CR register file
 461                         sync += new_srcmask.eq(0)
 462                         sync += new_dstmask.eq(0)
 463                         m.next = "CR_READ"
 464                     with m.Else():
 465                         sync += self.srcmask.eq(-1)
 466                         sync += self.dstmask.eq(-1)
 467                         m.next = "FETCH_PRED_DONE"
 468
 469             with m.State("INT_DST_READ"):
 470                 # store destination mask
 471                 inv = Repl(dinvert, 64)
 472                 with m.If(dunary):
 473                     # set selected mask bit for 1<<r3 mode
 474                     dst_shift = Signal(range(64))
 475                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 476                     sync += new_dstmask.eq(1 << dst_shift)
 477                 with m.Else():
 478                     # invert mask if requested
 479                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 480                 # skip fetching source mask register, when zero
 481                 with m.If(sall1s):
 482                     sync += new_srcmask.eq(-1)
 483                     m.next = "FETCH_PRED_SHIFT_MASK"
 484                 # fetch source predicate register
 485                 with m.Else():
 486                     comb += int_pred.addr.eq(sregread)
 487                     comb += int_pred.ren.eq(1)
 488                     m.next = "INT_SRC_READ"
 489
 490             with m.State("INT_SRC_READ"):
 491                 # store source mask
 492                 inv = Repl(sinvert, 64)
 493                 with m.If(sunary):
 494                     # set selected mask bit for 1<<r3 mode
 495                     src_shift = Signal(range(64))
 496                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 497                     sync += new_srcmask.eq(1 << src_shift)
 498                 with m.Else():
 499                     # invert mask if requested
 500                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 501                 m.next = "FETCH_PRED_SHIFT_MASK"
 502
 503             # fetch masks from the CR register file
 504             # implements the following loop:
 505             # idx, inv = get_predcr(mask)
 506             # mask = 0
 507             # for cr_idx in range(vl):
 508             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 509             #     if cr[idx] ^ inv:
 510             #         mask |= 1 << cr_idx
 511             # return mask
 512             with m.State("CR_READ"):
 513                 # CR index to be read, which will be ready by the next cycle
 514                 cr_idx = Signal.like(cur_vl, reset_less=True)
 515                 # submit the read operation to the regfile
 516                 with m.If(cr_idx != cur_vl):
 517                     # the CR read port is unary ...
 518                     # ren = 1 << cr_idx
 519                     # ... in MSB0 convention ...
 520                     # ren = 1 << (7 - cr_idx)
 521                     # ... and with an offset:
 522                     # ren = 1 << (7 - off - cr_idx)
 523                     idx = SVP64CROffs.CRPred + cr_idx
 524                     comb += cr_pred.ren.eq(1 << (7 - idx))
 525                     # signal data valid in the next cycle
 526                     cr_read = Signal(reset_less=True)
 527                     sync += cr_read.eq(1)
 528                     # load the next index
 529                     sync += cr_idx.eq(cr_idx + 1)
 530                 with m.Else():
 531                     # exit on loop end
 532                     sync += cr_read.eq(0)
 533                     sync += cr_idx.eq(0)
 534                     m.next = "FETCH_PRED_SHIFT_MASK"
 535                 with m.If(cr_read):
 536                     # compensate for the one cycle delay on the regfile
 537                     cur_cr_idx = Signal.like(cur_vl)
 538                     comb += cur_cr_idx.eq(cr_idx - 1)
 539                     # read the CR field, select the appropriate bit
 540                     cr_field = Signal(4)
 541                     scr_bit = Signal()
 542                     dcr_bit = Signal()
 543                     comb += cr_field.eq(cr_pred.o_data)
 544                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 545                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 546                     # set the corresponding mask bit
 547                     bit_to_set = Signal.like(self.srcmask)
 548                     comb += bit_to_set.eq(1 << cur_cr_idx)
 549                     with m.If(scr_bit):
 550                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 551                     with m.If(dcr_bit):
 552                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 553
 554             with m.State("FETCH_PRED_SHIFT_MASK"):
 555                 # shift-out skipped mask bits
 556                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 557                 sync += self.dstmask.eq(new_dstmask >> dststep)
 558                 m.next = "FETCH_PRED_DONE"
 559
 560             with m.State("FETCH_PRED_DONE"):
 561                 comb += pred_mask_o_valid.eq(1)
 562                 with m.If(pred_mask_i_ready):
 563                     m.next = "FETCH_PRED_IDLE"
 564
 565     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 566                   dbg, core_rst, is_svp64_mode,
 567                   fetch_pc_o_ready, fetch_pc_i_valid,
 568                   fetch_insn_o_valid, fetch_insn_i_ready,
 569                   pred_insn_i_valid, pred_insn_o_ready,
 570                   pred_mask_o_valid, pred_mask_i_ready,
 571                   exec_insn_i_valid, exec_insn_o_ready,
 572                   exec_pc_o_valid, exec_pc_i_ready):
 573         """issue FSM
 574
 575         decode / issue FSM.  this interacts with the "fetch" FSM
 576         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 577         (outgoing). also interacts with the "execute" FSM
 578         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 579         (incoming).
 580         SVP64 RM prefixes have already been set up by the
 581         "fetch" phase, so execute is fairly straightforward.
 582         """
 583
 584         comb = m.d.comb
 585         sync = m.d.sync
 586         pdecode2 = self.pdecode2
 587         cur_state = self.cur_state
 588
 589         # temporaries
 590         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 591
 592         # for updating svstate (things like srcstep etc.)
 593         update_svstate = Signal() # set this (below) if updating
 594         new_svstate = SVSTATERec("new_svstate")
 595         comb += new_svstate.eq(cur_state.svstate)
 596
 597         # precalculate srcstep+1 and dststep+1
 598         cur_srcstep = cur_state.svstate.srcstep
 599         cur_dststep = cur_state.svstate.dststep
 600         next_srcstep = Signal.like(cur_srcstep)
 601         next_dststep = Signal.like(cur_dststep)
 602         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 603         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 604
 605         # note if an exception happened.  in a pipelined or OoO design
 606         # this needs to be accompanied by "shadowing" (or stalling)
 607         exc_happened = self.core.o.exc_happened
 608
 609         with m.FSM(name="issue_fsm"):
 610
 611             # sync with the "fetch" phase which is reading the instruction
 612             # at this point, there is no instruction running, that
 613             # could inadvertently update the PC.
 614             with m.State("ISSUE_START"):
 615                 # wait on "core stop" release, before next fetch
 616                 # need to do this here, in case we are in a VL==0 loop
 617                 with m.If(~dbg.core_stop_o & ~core_rst):
 618                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 619                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 620                         m.next = "INSN_WAIT"
 621                 with m.Else():
 622                     # tell core it's stopped, and acknowledge debug handshake
 623                     comb += dbg.core_stopped_i.eq(1)
 624                     # while stopped, allow updating the PC and SVSTATE
 625                     with m.If(self.pc_i.ok):
 626                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 627                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 628                         sync += pc_changed.eq(1)
 629                     with m.If(self.svstate_i.ok):
 630                         comb += new_svstate.eq(self.svstate_i.data)
 631                         comb += update_svstate.eq(1)
 632                         sync += sv_changed.eq(1)
 633
 634             # wait for an instruction to arrive from Fetch
 635             with m.State("INSN_WAIT"):
 636                 comb += fetch_insn_i_ready.eq(1)
 637                 with m.If(fetch_insn_o_valid):
 638                     # loop into ISSUE_START if it's a SVP64 instruction
 639                     # and VL == 0.  this because VL==0 is a for-loop
 640                     # from 0 to 0 i.e. always, always a NOP.
 641                     cur_vl = cur_state.svstate.vl
 642                     with m.If(is_svp64_mode & (cur_vl == 0)):
 643                         # update the PC before fetching the next instruction
 644                         # since we are in a VL==0 loop, no instruction was
 645                         # executed that we could be overwriting
 646                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 647                         comb += self.state_w_pc.i_data.eq(nia)
 648                         comb += self.insn_done.eq(1)
 649                         m.next = "ISSUE_START"
 650                     with m.Else():
 651                         if self.svp64_en:
 652                             m.next = "PRED_START"  # start fetching predicate
 653                         else:
 654                             m.next = "DECODE_SV"  # skip predication
 655
 656             with m.State("PRED_START"):
 657                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 658                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 659                     m.next = "MASK_WAIT"
 660
 661             with m.State("MASK_WAIT"):
 662                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 663                 with m.If(pred_mask_o_valid): # predication masks are ready
 664                     m.next = "PRED_SKIP"
 665
 666             # skip zeros in predicate
 667             with m.State("PRED_SKIP"):
 668                 with m.If(~is_svp64_mode):
 669                     m.next = "DECODE_SV"  # nothing to do
 670                 with m.Else():
 671                     if self.svp64_en:
 672                         pred_src_zero = pdecode2.rm_dec.pred_sz
 673                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 674
 675                         # new srcstep, after skipping zeros
 676                         skip_srcstep = Signal.like(cur_srcstep)
 677                         # value to be added to the current srcstep
 678                         src_delta = Signal.like(cur_srcstep)
 679                         # add leading zeros to srcstep, if not in zero mode
 680                         with m.If(~pred_src_zero):
 681                             # priority encoder (count leading zeros)
 682                             # append guard bit, in case the mask is all zeros
 683                             pri_enc_src = PriorityEncoder(65)
 684                             m.submodules.pri_enc_src = pri_enc_src
 685                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 686                                                          Const(1, 1)))
 687                             comb += src_delta.eq(pri_enc_src.o)
 688                         # apply delta to srcstep
 689                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 690                         # shift-out all leading zeros from the mask
 691                         # plus the leading "one" bit
 692                         # TODO count leading zeros and shift-out the zero
 693                         #      bits, in the same step, in hardware
 694                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 695
 696                         # same as above, but for dststep
 697                         skip_dststep = Signal.like(cur_dststep)
 698                         dst_delta = Signal.like(cur_dststep)
 699                         with m.If(~pred_dst_zero):
 700                             pri_enc_dst = PriorityEncoder(65)
 701                             m.submodules.pri_enc_dst = pri_enc_dst
 702                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 703                                                          Const(1, 1)))
 704                             comb += dst_delta.eq(pri_enc_dst.o)
 705                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 706                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 707
 708                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 709                         with m.If((skip_srcstep >= cur_vl) |
 710                                   (skip_dststep >= cur_vl)):
 711                             # end of VL loop. Update PC and reset src/dst step
 712                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 713                             comb += self.state_w_pc.i_data.eq(nia)
 714                             comb += new_svstate.srcstep.eq(0)
 715                             comb += new_svstate.dststep.eq(0)
 716                             comb += update_svstate.eq(1)
 717                             # synchronize with the simulator
 718                             comb += self.insn_done.eq(1)
 719                             # go back to Issue
 720                             m.next = "ISSUE_START"
 721                         with m.Else():
 722                             # update new src/dst step
 723                             comb += new_svstate.srcstep.eq(skip_srcstep)
 724                             comb += new_svstate.dststep.eq(skip_dststep)
 725                             comb += update_svstate.eq(1)
 726                             # proceed to Decode
 727                             m.next = "DECODE_SV"
 728
 729                         # pass predicate mask bits through to satellite decoders
 730                         # TODO: for SIMD this will be *multiple* bits
 731                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 732                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 733
 734             # after src/dst step have been updated, we are ready
 735             # to decode the instruction
 736             with m.State("DECODE_SV"):
 737                 # decode the instruction
 738                 sync += core.i.e.eq(pdecode2.e)
 739                 sync += core.i.state.eq(cur_state)
 740                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 741                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 742                 if self.svp64_en:
 743                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 744                     # set RA_OR_ZERO detection in satellite decoders
 745                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 746                     # and svp64 detection
 747                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 748                     # and svp64 bit-rev'd ldst mode
 749                     ldst_dec = pdecode2.use_svp64_ldst_dec
 750                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 751                 # after decoding, reset any previous exception condition,
 752                 # allowing it to be set again during the next execution
 753                 sync += pdecode2.ldst_exc.eq(0)
 754
 755                 m.next = "INSN_EXECUTE"  # move to "execute"
 756
 757             # handshake with execution FSM, move to "wait" once acknowledged
 758             with m.State("INSN_EXECUTE"):
 759                 comb += exec_insn_i_valid.eq(1) # trigger execute
 760                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 761                     m.next = "EXECUTE_WAIT"
 762
 763             with m.State("EXECUTE_WAIT"):
 764                 # wait on "core stop" release, at instruction end
 765                 # need to do this here, in case we are in a VL>1 loop
 766                 with m.If(~dbg.core_stop_o & ~core_rst):
 767                     comb += exec_pc_i_ready.eq(1)
 768                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 769                     # the exception info needs to be blatted into
 770                     # pdecode.ldst_exc, and the instruction "re-run".
 771                     # when ldst_exc.happened is set, the PowerDecoder2
 772                     # reacts very differently: it re-writes the instruction
 773                     # with a "trap" (calls PowerDecoder2.trap()) which
 774                     # will *overwrite* whatever was requested and jump the
 775                     # PC to the exception address, as well as alter MSR.
 776                     # nothing else needs to be done other than to note
 777                     # the change of PC and MSR (and, later, SVSTATE)
 778                     with m.If(exc_happened):
 779                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 780
 781                     with m.If(exec_pc_o_valid):
 782
 783                         # was this the last loop iteration?
 784                         is_last = Signal()
 785                         cur_vl = cur_state.svstate.vl
 786                         comb += is_last.eq(next_srcstep == cur_vl)
 787
 788                         # return directly to Decode if Execute generated an
 789                         # exception.
 790                         with m.If(pdecode2.ldst_exc.happened):
 791                             m.next = "DECODE_SV"
 792
 793                         # if either PC or SVSTATE were changed by the previous
 794                         # instruction, go directly back to Fetch, without
 795                         # updating either PC or SVSTATE
 796                         with m.Elif(pc_changed | sv_changed):
 797                             m.next = "ISSUE_START"
 798
 799                         # also return to Fetch, when no output was a vector
 800                         # (regardless of SRCSTEP and VL), or when the last
 801                         # instruction was really the last one of the VL loop
 802                         with m.Elif((~pdecode2.loop_continue) | is_last):
 803                             # before going back to fetch, update the PC state
 804                             # register with the NIA.
 805                             # ok here we are not reading the branch unit.
 806                             # TODO: this just blithely overwrites whatever
 807                             #       pipeline updated the PC
 808                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 809                             comb += self.state_w_pc.i_data.eq(nia)
 810                             # reset SRCSTEP before returning to Fetch
 811                             if self.svp64_en:
 812                                 with m.If(pdecode2.loop_continue):
 813                                     comb += new_svstate.srcstep.eq(0)
 814                                     comb += new_svstate.dststep.eq(0)
 815                                     comb += update_svstate.eq(1)
 816                             else:
 817                                 comb += new_svstate.srcstep.eq(0)
 818                                 comb += new_svstate.dststep.eq(0)
 819                                 comb += update_svstate.eq(1)
 820                             m.next = "ISSUE_START"
 821
 822                         # returning to Execute? then, first update SRCSTEP
 823                         with m.Else():
 824                             comb += new_svstate.srcstep.eq(next_srcstep)
 825                             comb += new_svstate.dststep.eq(next_dststep)
 826                             comb += update_svstate.eq(1)
 827                             # return to mask skip loop
 828                             m.next = "PRED_SKIP"
 829
 830                 with m.Else():
 831                     comb += dbg.core_stopped_i.eq(1)
 832                     # while stopped, allow updating the PC and SVSTATE
 833                     with m.If(self.pc_i.ok):
 834                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 835                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 836                         sync += pc_changed.eq(1)
 837                     with m.If(self.svstate_i.ok):
 838                         comb += new_svstate.eq(self.svstate_i.data)
 839                         comb += update_svstate.eq(1)
 840                         sync += sv_changed.eq(1)
 841
 842         # check if svstate needs updating: if so, write it to State Regfile
 843         with m.If(update_svstate):
 844             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 845             comb += self.state_w_sv.i_data.eq(new_svstate)
 846             sync += cur_state.svstate.eq(new_svstate) # for next clock
 847
 848     def execute_fsm(self, m, core, pc_changed, sv_changed,
 849                     exec_insn_i_valid, exec_insn_o_ready,
 850                     exec_pc_o_valid, exec_pc_i_ready):
 851         """execute FSM
 852
 853         execute FSM. this interacts with the "issue" FSM
 854         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 855         (outgoing). SVP64 RM prefixes have already been set up by the
 856         "issue" phase, so execute is fairly straightforward.
 857         """
 858
 859         comb = m.d.comb
 860         sync = m.d.sync
 861         pdecode2 = self.pdecode2
 862
 863         # temporaries
 864         core_busy_o = ~core.p.o_ready                # core is busy
 865         core_ivalid_i = core.p.i_valid              # instruction is valid
 866         insn_type = core.i.e.do.insn_type           # instruction MicroOp type
 867
 868         with m.FSM(name="exec_fsm"):
 869
 870             # waiting for instruction bus (stays there until not busy)
 871             with m.State("INSN_START"):
 872                 comb += exec_insn_o_ready.eq(1)
 873                 with m.If(exec_insn_i_valid):
 874                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 875                     sync += sv_changed.eq(0)
 876                     sync += pc_changed.eq(0)
 877                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 878
 879             # instruction started: must wait till it finishes
 880             with m.State("INSN_ACTIVE"):
 881                 # note changes to PC and SVSTATE
 882                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 883                     sync += sv_changed.eq(1)
 884                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 885                     sync += pc_changed.eq(1)
 886                 with m.If(~core_busy_o): # instruction done!
 887                     comb += exec_pc_o_valid.eq(1)
 888                     with m.If(exec_pc_i_ready):
 889                         # when finished, indicate "done".
 890                         # however, if there was an exception, the instruction
 891                         # is *not* yet done.  this is an implementation
 892                         # detail: we choose to implement exceptions by
 893                         # taking the exception information from the LDST
 894                         # unit, putting that *back* into the PowerDecoder2,
 895                         # and *re-running the entire instruction*.
 896                         # if we erroneously indicate "done" here, it is as if
 897                         # there were *TWO* instructions:
 898                         # 1) the failed LDST 2) a TRAP.
 899                         with m.If(~pdecode2.ldst_exc.happened):
 900                             comb += self.insn_done.eq(1)
 901                         m.next = "INSN_START"  # back to fetch
 902
 903     def setup_peripherals(self, m):
 904         comb, sync = m.d.comb, m.d.sync
 905
 906         # okaaaay so the debug module must be in coresync clock domain
 907         # but NOT its reset signal. to cope with this, set every single
 908         # submodule explicitly in coresync domain, debug and JTAG
 909         # in their own one but using *external* reset.
 910         csd = DomainRenamer("coresync")
 911         dbd = DomainRenamer(self.dbg_domain)
 912
 913         m.submodules.core = core = csd(self.core)
 914         m.submodules.imem = imem = csd(self.imem)
 915         m.submodules.dbg = dbg = dbd(self.dbg)
 916         if self.jtag_en:
 917             m.submodules.jtag = jtag = dbd(self.jtag)
 918             # TODO: UART2GDB mux, here, from external pin
 919             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 920             sync += dbg.dmi.connect_to(jtag.dmi)
 921
 922         cur_state = self.cur_state
 923
 924         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 925         if self.sram4x4k:
 926             for i, sram in enumerate(self.sram4k):
 927                 m.submodules["sram4k_%d" % i] = csd(sram)
 928                 comb += sram.enable.eq(self.wb_sram_en)
 929
 930         # XICS interrupt handler
 931         if self.xics:
 932             m.submodules.xics_icp = icp = csd(self.xics_icp)
 933             m.submodules.xics_ics = ics = csd(self.xics_ics)
 934             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 935             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 936
 937         # GPIO test peripheral
 938         if self.gpio:
 939             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 940
 941         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 942         # XXX causes litex ECP5 test to get wrong idea about input and output
 943         # (but works with verilator sim *sigh*)
 944         #if self.gpio and self.xics:
 945         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 946
 947         # instruction decoder
 948         pdecode = create_pdecode()
 949         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 950         if self.svp64_en:
 951             m.submodules.svp64 = svp64 = csd(self.svp64)
 952
 953         # convenience
 954         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 955         intrf = self.core.regs.rf['int']
 956
 957         # clock delay power-on reset
 958         cd_por  = ClockDomain(reset_less=True)
 959         cd_sync = ClockDomain()
 960         core_sync = ClockDomain("coresync")
 961         m.domains += cd_por, cd_sync, core_sync
 962         if self.dbg_domain != "sync":
 963             dbg_sync = ClockDomain(self.dbg_domain)
 964             m.domains += dbg_sync
 965
 966         ti_rst = Signal(reset_less=True)
 967         delay = Signal(range(4), reset=3)
 968         with m.If(delay != 0):
 969             m.d.por += delay.eq(delay - 1)
 970         comb += cd_por.clk.eq(ClockSignal())
 971
 972         # power-on reset delay
 973         core_rst = ResetSignal("coresync")
 974         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 975         comb += core_rst.eq(ti_rst)
 976
 977         # debug clock is same as coresync, but reset is *main external*
 978         if self.dbg_domain != "sync":
 979             dbg_rst = ResetSignal(self.dbg_domain)
 980             comb += dbg_rst.eq(ResetSignal())
 981
 982         # busy/halted signals from core
 983         core_busy_o = ~core.p.o_ready                # core is busy
 984         comb += self.busy_o.eq(core_busy_o)
 985         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 986
 987         # temporary hack: says "go" immediately for both address gen and ST
 988         l0 = core.l0
 989         ldst = core.fus.fus['ldst0']
 990         st_go_edge = rising_edge(m, ldst.st.rel_o)
 991         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 992         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 993
 994     def elaborate(self, platform):
 995         m = Module()
 996         # convenience
 997         comb, sync = m.d.comb, m.d.sync
 998         cur_state = self.cur_state
 999         pdecode2 = self.pdecode2
1000         dbg = self.dbg
1001         core = self.core
1002
1003         # set up peripherals and core
1004         core_rst = self.core_rst
1005         self.setup_peripherals(m)
1006
1007         # reset current state if core reset requested
1008         with m.If(core_rst):
1009             m.d.sync += self.cur_state.eq(0)
1010
1011         # PC and instruction from I-Memory
1012         comb += self.pc_o.eq(cur_state.pc)
1013         pc_changed = Signal() # note write to PC
1014         sv_changed = Signal() # note write to SVSTATE
1015
1016         # read state either from incoming override or from regfile
1017         # TODO: really should be doing MSR in the same way
1018         pc = state_get(m, core_rst, self.pc_i,
1019                             "pc",                  # read PC
1020                             self.state_r_pc, StateRegs.PC)
1021         svstate = state_get(m, core_rst, self.svstate_i,
1022                             "svstate",   # read SVSTATE
1023                             self.state_r_sv, StateRegs.SVSTATE)
1024
1025         # don't write pc every cycle
1026         comb += self.state_w_pc.wen.eq(0)
1027         comb += self.state_w_pc.i_data.eq(0)
1028
1029         # don't read msr every cycle
1030         comb += self.state_r_msr.ren.eq(0)
1031
1032         # address of the next instruction, in the absence of a branch
1033         # depends on the instruction size
1034         nia = Signal(64)
1035
1036         # connect up debug signals
1037         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1038         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1039         comb += dbg.state.pc.eq(pc)
1040         comb += dbg.state.svstate.eq(svstate)
1041         comb += dbg.state.msr.eq(cur_state.msr)
1042
1043         # pass the prefix mode from Fetch to Issue, so the latter can loop
1044         # on VL==0
1045         is_svp64_mode = Signal()
1046
1047         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1048         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1049         # these are the handshake signals between each
1050
1051         # fetch FSM can run as soon as the PC is valid
1052         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1053         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1054
1055         # fetch FSM hands over the instruction to be decoded / issued
1056         fetch_insn_o_valid = Signal()
1057         fetch_insn_i_ready = Signal()
1058
1059         # predicate fetch FSM decodes and fetches the predicate
1060         pred_insn_i_valid = Signal()
1061         pred_insn_o_ready = Signal()
1062
1063         # predicate fetch FSM delivers the masks
1064         pred_mask_o_valid = Signal()
1065         pred_mask_i_ready = Signal()
1066
1067         # issue FSM delivers the instruction to the be executed
1068         exec_insn_i_valid = Signal()
1069         exec_insn_o_ready = Signal()
1070
1071         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1072         exec_pc_o_valid = Signal()
1073         exec_pc_i_ready = Signal()
1074
1075         # the FSMs here are perhaps unusual in that they detect conditions
1076         # then "hold" information, combinatorially, for the core
1077         # (as opposed to using sync - which would be on a clock's delay)
1078         # this includes the actual opcode, valid flags and so on.
1079
1080         # Fetch, then predicate fetch, then Issue, then Execute.
1081         # Issue is where the VL for-loop # lives.  the ready/valid
1082         # signalling is used to communicate between the four.
1083
1084         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1085                        fetch_pc_o_ready, fetch_pc_i_valid,
1086                        fetch_insn_o_valid, fetch_insn_i_ready)
1087
1088         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1089                        dbg, core_rst, is_svp64_mode,
1090                        fetch_pc_o_ready, fetch_pc_i_valid,
1091                        fetch_insn_o_valid, fetch_insn_i_ready,
1092                        pred_insn_i_valid, pred_insn_o_ready,
1093                        pred_mask_o_valid, pred_mask_i_ready,
1094                        exec_insn_i_valid, exec_insn_o_ready,
1095                        exec_pc_o_valid, exec_pc_i_ready)
1096
1097         if self.svp64_en:
1098             self.fetch_predicate_fsm(m,
1099                                      pred_insn_i_valid, pred_insn_o_ready,
1100                                      pred_mask_o_valid, pred_mask_i_ready)
1101
1102         self.execute_fsm(m, core, pc_changed, sv_changed,
1103                          exec_insn_i_valid, exec_insn_o_ready,
1104                          exec_pc_o_valid, exec_pc_i_ready)
1105
1106         # whatever was done above, over-ride it if core reset is held
1107         with m.If(core_rst):
1108             sync += nia.eq(0)
1109
1110         # this bit doesn't have to be in the FSM: connect up to read
1111         # regfiles on demand from DMI
1112         self.do_dmi(m, dbg)
1113
1114         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1115         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1116         self.tb_dec_fsm(m, cur_state.dec)
1117
1118         return m
1119
1120     def do_dmi(self, m, dbg):
1121         """deals with DMI debug requests
1122
1123         currently only provides read requests for the INT regfile, CR and XER
1124         it will later also deal with *writing* to these regfiles.
1125         """
1126         comb = m.d.comb
1127         sync = m.d.sync
1128         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1129         intrf = self.core.regs.rf['int']
1130
1131         with m.If(d_reg.req): # request for regfile access being made
1132             # TODO: error-check this
1133             # XXX should this be combinatorial?  sync better?
1134             if intrf.unary:
1135                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1136             else:
1137                 comb += self.int_r.addr.eq(d_reg.addr)
1138                 comb += self.int_r.ren.eq(1)
1139         d_reg_delay  = Signal()
1140         sync += d_reg_delay.eq(d_reg.req)
1141         with m.If(d_reg_delay):
1142             # data arrives one clock later
1143             comb += d_reg.data.eq(self.int_r.o_data)
1144             comb += d_reg.ack.eq(1)
1145
1146         # sigh same thing for CR debug
1147         with m.If(d_cr.req): # request for regfile access being made
1148             comb += self.cr_r.ren.eq(0b11111111) # enable all
1149         d_cr_delay  = Signal()
1150         sync += d_cr_delay.eq(d_cr.req)
1151         with m.If(d_cr_delay):
1152             # data arrives one clock later
1153             comb += d_cr.data.eq(self.cr_r.o_data)
1154             comb += d_cr.ack.eq(1)
1155
1156         # aaand XER...
1157         with m.If(d_xer.req): # request for regfile access being made
1158             comb += self.xer_r.ren.eq(0b111111) # enable all
1159         d_xer_delay  = Signal()
1160         sync += d_xer_delay.eq(d_xer.req)
1161         with m.If(d_xer_delay):
1162             # data arrives one clock later
1163             comb += d_xer.data.eq(self.xer_r.o_data)
1164             comb += d_xer.ack.eq(1)
1165
1166     def tb_dec_fsm(self, m, spr_dec):
1167         """tb_dec_fsm
1168
1169         this is a FSM for updating either dec or tb.  it runs alternately
1170         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1171         value to DEC, however the regfile has "passthrough" on it so this
1172         *should* be ok.
1173
1174         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1175         """
1176
1177         comb, sync = m.d.comb, m.d.sync
1178         fast_rf = self.core.regs.rf['fast']
1179         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1180         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1181
1182         with m.FSM() as fsm:
1183
1184             # initiates read of current DEC
1185             with m.State("DEC_READ"):
1186                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1187                 comb += fast_r_dectb.ren.eq(1)
1188                 m.next = "DEC_WRITE"
1189
1190             # waits for DEC read to arrive (1 cycle), updates with new value
1191             with m.State("DEC_WRITE"):
1192                 new_dec = Signal(64)
1193                 # TODO: MSR.LPCR 32-bit decrement mode
1194                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1195                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1196                 comb += fast_w_dectb.wen.eq(1)
1197                 comb += fast_w_dectb.i_data.eq(new_dec)
1198                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1199                 m.next = "TB_READ"
1200
1201             # initiates read of current TB
1202             with m.State("TB_READ"):
1203                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1204                 comb += fast_r_dectb.ren.eq(1)
1205                 m.next = "TB_WRITE"
1206
1207             # waits for read TB to arrive, initiates write of current TB
1208             with m.State("TB_WRITE"):
1209                 new_tb = Signal(64)
1210                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1211                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1212                 comb += fast_w_dectb.wen.eq(1)
1213                 comb += fast_w_dectb.i_data.eq(new_tb)
1214                 m.next = "DEC_READ"
1215
1216         return m
1217
1218     def __iter__(self):
1219         yield from self.pc_i.ports()
1220         yield self.pc_o
1221         yield self.memerr_o
1222         yield from self.core.ports()
1223         yield from self.imem.ports()
1224         yield self.core_bigendian_i
1225         yield self.busy_o
1226
1227     def ports(self):
1228         return list(self)
1229
1230     def external_ports(self):
1231         ports = self.pc_i.ports()
1232         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1233                 ]
1234
1235         if self.jtag_en:
1236             ports += list(self.jtag.external_ports())
1237         else:
1238             # don't add DMI if JTAG is enabled
1239             ports += list(self.dbg.dmi.ports())
1240
1241         ports += list(self.imem.ibus.fields.values())
1242         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1243
1244         if self.sram4x4k:
1245             for sram in self.sram4k:
1246                 ports += list(sram.bus.fields.values())
1247
1248         if self.xics:
1249             ports += list(self.xics_icp.bus.fields.values())
1250             ports += list(self.xics_ics.bus.fields.values())
1251             ports.append(self.int_level_i)
1252
1253         if self.gpio:
1254             ports += list(self.simple_gpio.bus.fields.values())
1255             ports.append(self.gpio_o)
1256
1257         return ports
1258
1259     def ports(self):
1260         return list(self)
1261
1262
1263 class TestIssuer(Elaboratable):
1264     def __init__(self, pspec):
1265         self.ti = TestIssuerInternal(pspec)
1266         self.pll = DummyPLL(instance=True)
1267
1268         # PLL direct clock or not
1269         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1270         if self.pll_en:
1271             self.pll_test_o = Signal(reset_less=True)
1272             self.pll_vco_o = Signal(reset_less=True)
1273             self.clk_sel_i = Signal(2, reset_less=True)
1274             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1275             self.pllclk_clk = ClockSignal("pllclk")
1276
1277     def elaborate(self, platform):
1278         m = Module()
1279         comb = m.d.comb
1280
1281         # TestIssuer nominally runs at main clock, actually it is
1282         # all combinatorial internally except for coresync'd components
1283         m.submodules.ti = ti = self.ti
1284
1285         if self.pll_en:
1286             # ClockSelect runs at PLL output internal clock rate
1287             m.submodules.wrappll = pll = self.pll
1288
1289             # add clock domains from PLL
1290             cd_pll = ClockDomain("pllclk")
1291             m.domains += cd_pll
1292
1293             # PLL clock established.  has the side-effect of running clklsel
1294             # at the PLL's speed (see DomainRenamer("pllclk") above)
1295             pllclk = self.pllclk_clk
1296             comb += pllclk.eq(pll.clk_pll_o)
1297
1298             # wire up external 24mhz to PLL
1299             #comb += pll.clk_24_i.eq(self.ref_clk)
1300             # output 18 mhz PLL test signal, and analog oscillator out
1301             comb += self.pll_test_o.eq(pll.pll_test_o)
1302             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1303
1304             # input to pll clock selection
1305             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1306
1307             # now wire up ResetSignals.  don't mind them being in this domain
1308             pll_rst = ResetSignal("pllclk")
1309             comb += pll_rst.eq(ResetSignal())
1310
1311         # internal clock is set to selector clock-out.  has the side-effect of
1312         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1313         # debug clock runs at coresync internal clock
1314         cd_coresync = ClockDomain("coresync")
1315         #m.domains += cd_coresync
1316         if self.ti.dbg_domain != 'sync':
1317             cd_dbgsync = ClockDomain("dbgsync")
1318             #m.domains += cd_dbgsync
1319         intclk = ClockSignal("coresync")
1320         dbgclk = ClockSignal(self.ti.dbg_domain)
1321         # XXX BYPASS PLL XXX
1322         # XXX BYPASS PLL XXX
1323         # XXX BYPASS PLL XXX
1324         if self.pll_en:
1325             comb += intclk.eq(self.ref_clk)
1326         else:
1327             comb += intclk.eq(ClockSignal())
1328         if self.ti.dbg_domain != 'sync':
1329             dbgclk = ClockSignal(self.ti.dbg_domain)
1330             comb += dbgclk.eq(intclk)
1331
1332         return m
1333
1334     def ports(self):
1335         return list(self.ti.ports()) + list(self.pll.ports()) + \
1336                [ClockSignal(), ResetSignal()]
1337
1338     def external_ports(self):
1339         ports = self.ti.external_ports()
1340         ports.append(ClockSignal())
1341         ports.append(ResetSignal())
1342         if self.pll_en:
1343             ports.append(self.clk_sel_i)
1344             ports.append(self.pll.clk_24_i)
1345             ports.append(self.pll_test_o)
1346             ports.append(self.pll_vco_o)
1347             ports.append(self.pllclk_clk)
1348             ports.append(self.ref_clk)
1349         return ports
1350
1351
1352 if __name__ == '__main__':
1353     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1354              'spr': 1,
1355              'div': 1,
1356              'mul': 1,
1357              'shiftrot': 1
1358             }
1359     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1360                          imem_ifacetype='bare_wb',
1361                          addr_wid=48,
1362                          mask_wid=8,
1363                          reg_wid=64,
1364                          units=units)
1365     dut = TestIssuer(pspec)
1366     vl = main(dut, ports=dut.ports(), name="test_issuer")
1367
1368     if len(sys.argv) == 1:
1369         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1370         with open("test_issuer.il", "w") as f:
1371             f.write(vl)