src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         #pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         pdecode = self.pdecode2.dec
 232
 233         if self.svp64_en:
 234             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 235
 236         # Test Instruction memory
 237         self.imem = ConfigFetchUnit(pspec).fu
 238
 239         # DMI interface
 240         self.dbg = CoreDebug()
 241
 242         # instruction go/monitor
 243         self.pc_o = Signal(64, reset_less=True)
 244         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 245         self.svstate_i = Data(64, "svstate_i") # ditto
 246         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 247         self.busy_o = Signal(reset_less=True)
 248         self.memerr_o = Signal(reset_less=True)
 249
 250         # STATE regfile read /write ports for PC, MSR, SVSTATE
 251         staterf = self.core.regs.rf['state']
 252         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 253         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 254         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 255         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 256         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 257
 258         # DMI interface access
 259         intrf = self.core.regs.rf['int']
 260         crrf = self.core.regs.rf['cr']
 261         xerrf = self.core.regs.rf['xer']
 262         self.int_r = intrf.r_ports['dmi'] # INT read
 263         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 264         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 265
 266         if self.svp64_en:
 267             # for predication
 268             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 269             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 270
 271         # hack method of keeping an eye on whether branch/trap set the PC
 272         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 273         self.state_nia.wen.name = 'state_nia_wen'
 274
 275         # pulse to synchronize the simulator at instruction end
 276         self.insn_done = Signal()
 277
 278         if self.svp64_en:
 279             # store copies of predicate masks
 280             self.srcmask = Signal(64)
 281             self.dstmask = Signal(64)
 282
 283     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 284                         fetch_pc_ready_o, fetch_pc_valid_i,
 285                         fetch_insn_valid_o, fetch_insn_ready_i):
 286         """fetch FSM
 287
 288         this FSM performs fetch of raw instruction data, partial-decodes
 289         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 290         read a 2nd 32-bit quantity if that occurs.
 291         """
 292         comb = m.d.comb
 293         sync = m.d.sync
 294         pdecode2 = self.pdecode2
 295         cur_state = self.cur_state
 296         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 297
 298         msr_read = Signal(reset=1)
 299
 300         with m.FSM(name='fetch_fsm'):
 301
 302             # waiting (zzz)
 303             with m.State("IDLE"):
 304                 comb += fetch_pc_ready_o.eq(1)
 305                 with m.If(fetch_pc_valid_i):
 306                     # instruction allowed to go: start by reading the PC
 307                     # capture the PC and also drop it into Insn Memory
 308                     # we have joined a pair of combinatorial memory
 309                     # lookups together.  this is Generally Bad.
 310                     comb += self.imem.a_pc_i.eq(pc)
 311                     comb += self.imem.a_valid_i.eq(1)
 312                     comb += self.imem.f_valid_i.eq(1)
 313                     sync += cur_state.pc.eq(pc)
 314                     sync += cur_state.svstate.eq(svstate) # and svstate
 315
 316                     # initiate read of MSR. arrives one clock later
 317                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 318                     sync += msr_read.eq(0)
 319
 320                     m.next = "INSN_READ"  # move to "wait for bus" phase
 321
 322             # dummy pause to find out why simulation is not keeping up
 323             with m.State("INSN_READ"):
 324                 # one cycle later, msr/sv read arrives.  valid only once.
 325                 with m.If(~msr_read):
 326                     sync += msr_read.eq(1) # yeah don't read it again
 327                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 328                 with m.If(self.imem.f_busy_o): # zzz...
 329                     # busy: stay in wait-read
 330                     comb += self.imem.a_valid_i.eq(1)
 331                     comb += self.imem.f_valid_i.eq(1)
 332                 with m.Else():
 333                     # not busy: instruction fetched
 334                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 335                     if self.svp64_en:
 336                         svp64 = self.svp64
 337                         # decode the SVP64 prefix, if any
 338                         comb += svp64.raw_opcode_in.eq(insn)
 339                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 340                         # pass the decoded prefix (if any) to PowerDecoder2
 341                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 342                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 343                         # remember whether this is a prefixed instruction, so
 344                         # the FSM can readily loop when VL==0
 345                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 346                         # calculate the address of the following instruction
 347                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 348                         sync += nia.eq(cur_state.pc + insn_size)
 349                         with m.If(~svp64.is_svp64_mode):
 350                             # with no prefix, store the instruction
 351                             # and hand it directly to the next FSM
 352                             sync += dec_opcode_i.eq(insn)
 353                             m.next = "INSN_READY"
 354                         with m.Else():
 355                             # fetch the rest of the instruction from memory
 356                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 357                             comb += self.imem.a_valid_i.eq(1)
 358                             comb += self.imem.f_valid_i.eq(1)
 359                             m.next = "INSN_READ2"
 360                     else:
 361                         # not SVP64 - 32-bit only
 362                         sync += nia.eq(cur_state.pc + 4)
 363                         sync += dec_opcode_i.eq(insn)
 364                         m.next = "INSN_READY"
 365
 366             with m.State("INSN_READ2"):
 367                 with m.If(self.imem.f_busy_o):  # zzz...
 368                     # busy: stay in wait-read
 369                     comb += self.imem.a_valid_i.eq(1)
 370                     comb += self.imem.f_valid_i.eq(1)
 371                 with m.Else():
 372                     # not busy: instruction fetched
 373                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 374                     sync += dec_opcode_i.eq(insn)
 375                     m.next = "INSN_READY"
 376                     # TODO: probably can start looking at pdecode2.rm_dec
 377                     # here or maybe even in INSN_READ state, if svp64_mode
 378                     # detected, in order to trigger - and wait for - the
 379                     # predicate reading.
 380                     if self.svp64_en:
 381                         pmode = pdecode2.rm_dec.predmode
 382                     """
 383                     if pmode != SVP64PredMode.ALWAYS.value:
 384                         fire predicate loading FSM and wait before
 385                         moving to INSN_READY
 386                     else:
 387                         sync += self.srcmask.eq(-1) # set to all 1s
 388                         sync += self.dstmask.eq(-1) # set to all 1s
 389                         m.next = "INSN_READY"
 390                     """
 391
 392             with m.State("INSN_READY"):
 393                 # hand over the instruction, to be decoded
 394                 comb += fetch_insn_valid_o.eq(1)
 395                 with m.If(fetch_insn_ready_i):
 396                     m.next = "IDLE"
 397
 398     def fetch_predicate_fsm(self, m,
 399                             pred_insn_valid_i, pred_insn_ready_o,
 400                             pred_mask_valid_o, pred_mask_ready_i):
 401         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 402            src/dest predicate masks
 403
 404         https://bugs.libre-soc.org/show_bug.cgi?id=617
 405         the predicates can be read here, by using IntRegs r_ports['pred']
 406         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 407         be done through multiple reads, extracting one relevant at a time.
 408         later, a faster way would be to use the 32-bit-wide CR port but
 409         this is more complex decoding, here.  equivalent code used in
 410         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 411
 412         note: this ENTIRE FSM is not to be called when svp64 is disabled
 413         """
 414         comb = m.d.comb
 415         sync = m.d.sync
 416         pdecode2 = self.pdecode2
 417         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 418         predmode = rm_dec.predmode
 419         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 420         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 421         # get src/dst step, so we can skip already used mask bits
 422         cur_state = self.cur_state
 423         srcstep = cur_state.svstate.srcstep
 424         dststep = cur_state.svstate.dststep
 425         cur_vl = cur_state.svstate.vl
 426
 427         # decode predicates
 428         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 429         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 430         sidx, scrinvert = get_predcr(m, srcpred, 's')
 431         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 432
 433         # store fetched masks, for either intpred or crpred
 434         # when src/dst step is not zero, the skipped mask bits need to be
 435         # shifted-out, before actually storing them in src/dest mask
 436         new_srcmask = Signal(64, reset_less=True)
 437         new_dstmask = Signal(64, reset_less=True)
 438
 439         with m.FSM(name="fetch_predicate"):
 440
 441             with m.State("FETCH_PRED_IDLE"):
 442                 comb += pred_insn_ready_o.eq(1)
 443                 with m.If(pred_insn_valid_i):
 444                     with m.If(predmode == SVP64PredMode.INT):
 445                         # skip fetching destination mask register, when zero
 446                         with m.If(dall1s):
 447                             sync += new_dstmask.eq(-1)
 448                             # directly go to fetch source mask register
 449                             # guaranteed not to be zero (otherwise predmode
 450                             # would be SVP64PredMode.ALWAYS, not INT)
 451                             comb += int_pred.addr.eq(sregread)
 452                             comb += int_pred.ren.eq(1)
 453                             m.next = "INT_SRC_READ"
 454                         # fetch destination predicate register
 455                         with m.Else():
 456                             comb += int_pred.addr.eq(dregread)
 457                             comb += int_pred.ren.eq(1)
 458                             m.next = "INT_DST_READ"
 459                     with m.Elif(predmode == SVP64PredMode.CR):
 460                         # go fetch masks from the CR register file
 461                         sync += new_srcmask.eq(0)
 462                         sync += new_dstmask.eq(0)
 463                         m.next = "CR_READ"
 464                     with m.Else():
 465                         sync += self.srcmask.eq(-1)
 466                         sync += self.dstmask.eq(-1)
 467                         m.next = "FETCH_PRED_DONE"
 468
 469             with m.State("INT_DST_READ"):
 470                 # store destination mask
 471                 inv = Repl(dinvert, 64)
 472                 with m.If(dunary):
 473                     # set selected mask bit for 1<<r3 mode
 474                     dst_shift = Signal(range(64))
 475                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 476                     sync += new_dstmask.eq(1 << dst_shift)
 477                 with m.Else():
 478                     # invert mask if requested
 479                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 480                 # skip fetching source mask register, when zero
 481                 with m.If(sall1s):
 482                     sync += new_srcmask.eq(-1)
 483                     m.next = "FETCH_PRED_SHIFT_MASK"
 484                 # fetch source predicate register
 485                 with m.Else():
 486                     comb += int_pred.addr.eq(sregread)
 487                     comb += int_pred.ren.eq(1)
 488                     m.next = "INT_SRC_READ"
 489
 490             with m.State("INT_SRC_READ"):
 491                 # store source mask
 492                 inv = Repl(sinvert, 64)
 493                 with m.If(sunary):
 494                     # set selected mask bit for 1<<r3 mode
 495                     src_shift = Signal(range(64))
 496                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 497                     sync += new_srcmask.eq(1 << src_shift)
 498                 with m.Else():
 499                     # invert mask if requested
 500                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 501                 m.next = "FETCH_PRED_SHIFT_MASK"
 502
 503             # fetch masks from the CR register file
 504             # implements the following loop:
 505             # idx, inv = get_predcr(mask)
 506             # mask = 0
 507             # for cr_idx in range(vl):
 508             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 509             #     if cr[idx] ^ inv:
 510             #         mask |= 1 << cr_idx
 511             # return mask
 512             with m.State("CR_READ"):
 513                 # CR index to be read, which will be ready by the next cycle
 514                 cr_idx = Signal.like(cur_vl, reset_less=True)
 515                 # submit the read operation to the regfile
 516                 with m.If(cr_idx != cur_vl):
 517                     # the CR read port is unary ...
 518                     # ren = 1 << cr_idx
 519                     # ... in MSB0 convention ...
 520                     # ren = 1 << (7 - cr_idx)
 521                     # ... and with an offset:
 522                     # ren = 1 << (7 - off - cr_idx)
 523                     idx = SVP64CROffs.CRPred + cr_idx
 524                     comb += cr_pred.ren.eq(1 << (7 - idx))
 525                     # signal data valid in the next cycle
 526                     cr_read = Signal(reset_less=True)
 527                     sync += cr_read.eq(1)
 528                     # load the next index
 529                     sync += cr_idx.eq(cr_idx + 1)
 530                 with m.Else():
 531                     # exit on loop end
 532                     sync += cr_read.eq(0)
 533                     sync += cr_idx.eq(0)
 534                     m.next = "FETCH_PRED_SHIFT_MASK"
 535                 with m.If(cr_read):
 536                     # compensate for the one cycle delay on the regfile
 537                     cur_cr_idx = Signal.like(cur_vl)
 538                     comb += cur_cr_idx.eq(cr_idx - 1)
 539                     # read the CR field, select the appropriate bit
 540                     cr_field = Signal(4)
 541                     scr_bit = Signal()
 542                     dcr_bit = Signal()
 543                     comb += cr_field.eq(cr_pred.data_o)
 544                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 545                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 546                     # set the corresponding mask bit
 547                     bit_to_set = Signal.like(self.srcmask)
 548                     comb += bit_to_set.eq(1 << cur_cr_idx)
 549                     with m.If(scr_bit):
 550                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 551                     with m.If(dcr_bit):
 552                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 553
 554             with m.State("FETCH_PRED_SHIFT_MASK"):
 555                 # shift-out skipped mask bits
 556                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 557                 sync += self.dstmask.eq(new_dstmask >> dststep)
 558                 m.next = "FETCH_PRED_DONE"
 559
 560             with m.State("FETCH_PRED_DONE"):
 561                 comb += pred_mask_valid_o.eq(1)
 562                 with m.If(pred_mask_ready_i):
 563                     m.next = "FETCH_PRED_IDLE"
 564
 565     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 566                   dbg, core_rst, is_svp64_mode,
 567                   fetch_pc_ready_o, fetch_pc_valid_i,
 568                   fetch_insn_valid_o, fetch_insn_ready_i,
 569                   pred_insn_valid_i, pred_insn_ready_o,
 570                   pred_mask_valid_o, pred_mask_ready_i,
 571                   exec_insn_valid_i, exec_insn_ready_o,
 572                   exec_pc_valid_o, exec_pc_ready_i):
 573         """issue FSM
 574
 575         decode / issue FSM.  this interacts with the "fetch" FSM
 576         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 577         (outgoing). also interacts with the "execute" FSM
 578         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 579         (incoming).
 580         SVP64 RM prefixes have already been set up by the
 581         "fetch" phase, so execute is fairly straightforward.
 582         """
 583
 584         comb = m.d.comb
 585         sync = m.d.sync
 586         pdecode2 = self.pdecode2
 587         cur_state = self.cur_state
 588
 589         # temporaries
 590         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 591
 592         # for updating svstate (things like srcstep etc.)
 593         update_svstate = Signal() # set this (below) if updating
 594         new_svstate = SVSTATERec("new_svstate")
 595         comb += new_svstate.eq(cur_state.svstate)
 596
 597         # precalculate srcstep+1 and dststep+1
 598         cur_srcstep = cur_state.svstate.srcstep
 599         cur_dststep = cur_state.svstate.dststep
 600         next_srcstep = Signal.like(cur_srcstep)
 601         next_dststep = Signal.like(cur_dststep)
 602         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 603         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 604
 605         # note if an exception happened.  in a pipelined or OoO design
 606         # this needs to be accompanied by "shadowing" (or stalling)
 607         el = []
 608         for exc in core.fus.excs.values():
 609             el.append(exc.happened)
 610         exc_happened = Signal()
 611         if len(el) > 0: # at least one exception
 612             comb += exc_happened.eq(Cat(*el).bool())
 613
 614         with m.FSM(name="issue_fsm"):
 615
 616             # sync with the "fetch" phase which is reading the instruction
 617             # at this point, there is no instruction running, that
 618             # could inadvertently update the PC.
 619             with m.State("ISSUE_START"):
 620                 # wait on "core stop" release, before next fetch
 621                 # need to do this here, in case we are in a VL==0 loop
 622                 with m.If(~dbg.core_stop_o & ~core_rst):
 623                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 624                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 625                         m.next = "INSN_WAIT"
 626                 with m.Else():
 627                     # tell core it's stopped, and acknowledge debug handshake
 628                     comb += dbg.core_stopped_i.eq(1)
 629                     # while stopped, allow updating the PC and SVSTATE
 630                     with m.If(self.pc_i.ok):
 631                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 632                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 633                         sync += pc_changed.eq(1)
 634                     with m.If(self.svstate_i.ok):
 635                         comb += new_svstate.eq(self.svstate_i.data)
 636                         comb += update_svstate.eq(1)
 637                         sync += sv_changed.eq(1)
 638
 639             # wait for an instruction to arrive from Fetch
 640             with m.State("INSN_WAIT"):
 641                 comb += fetch_insn_ready_i.eq(1)
 642                 with m.If(fetch_insn_valid_o):
 643                     # loop into ISSUE_START if it's a SVP64 instruction
 644                     # and VL == 0.  this because VL==0 is a for-loop
 645                     # from 0 to 0 i.e. always, always a NOP.
 646                     cur_vl = cur_state.svstate.vl
 647                     with m.If(is_svp64_mode & (cur_vl == 0)):
 648                         # update the PC before fetching the next instruction
 649                         # since we are in a VL==0 loop, no instruction was
 650                         # executed that we could be overwriting
 651                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 652                         comb += self.state_w_pc.data_i.eq(nia)
 653                         comb += self.insn_done.eq(1)
 654                         m.next = "ISSUE_START"
 655                     with m.Else():
 656                         if self.svp64_en:
 657                             m.next = "PRED_START"  # start fetching predicate
 658                         else:
 659                             m.next = "DECODE_SV"  # skip predication
 660
 661             with m.State("PRED_START"):
 662                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 663                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 664                     m.next = "MASK_WAIT"
 665
 666             with m.State("MASK_WAIT"):
 667                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 668                 with m.If(pred_mask_valid_o): # predication masks are ready
 669                     m.next = "PRED_SKIP"
 670
 671             # skip zeros in predicate
 672             with m.State("PRED_SKIP"):
 673                 with m.If(~is_svp64_mode):
 674                     m.next = "DECODE_SV"  # nothing to do
 675                 with m.Else():
 676                     if self.svp64_en:
 677                         pred_src_zero = pdecode2.rm_dec.pred_sz
 678                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 679
 680                         # new srcstep, after skipping zeros
 681                         skip_srcstep = Signal.like(cur_srcstep)
 682                         # value to be added to the current srcstep
 683                         src_delta = Signal.like(cur_srcstep)
 684                         # add leading zeros to srcstep, if not in zero mode
 685                         with m.If(~pred_src_zero):
 686                             # priority encoder (count leading zeros)
 687                             # append guard bit, in case the mask is all zeros
 688                             pri_enc_src = PriorityEncoder(65)
 689                             m.submodules.pri_enc_src = pri_enc_src
 690                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 691                                                          Const(1, 1)))
 692                             comb += src_delta.eq(pri_enc_src.o)
 693                         # apply delta to srcstep
 694                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 695                         # shift-out all leading zeros from the mask
 696                         # plus the leading "one" bit
 697                         # TODO count leading zeros and shift-out the zero
 698                         #      bits, in the same step, in hardware
 699                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 700
 701                         # same as above, but for dststep
 702                         skip_dststep = Signal.like(cur_dststep)
 703                         dst_delta = Signal.like(cur_dststep)
 704                         with m.If(~pred_dst_zero):
 705                             pri_enc_dst = PriorityEncoder(65)
 706                             m.submodules.pri_enc_dst = pri_enc_dst
 707                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 708                                                          Const(1, 1)))
 709                             comb += dst_delta.eq(pri_enc_dst.o)
 710                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 711                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 712
 713                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 714                         with m.If((skip_srcstep >= cur_vl) |
 715                                   (skip_dststep >= cur_vl)):
 716                             # end of VL loop. Update PC and reset src/dst step
 717                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 718                             comb += self.state_w_pc.data_i.eq(nia)
 719                             comb += new_svstate.srcstep.eq(0)
 720                             comb += new_svstate.dststep.eq(0)
 721                             comb += update_svstate.eq(1)
 722                             # synchronize with the simulator
 723                             comb += self.insn_done.eq(1)
 724                             # go back to Issue
 725                             m.next = "ISSUE_START"
 726                         with m.Else():
 727                             # update new src/dst step
 728                             comb += new_svstate.srcstep.eq(skip_srcstep)
 729                             comb += new_svstate.dststep.eq(skip_dststep)
 730                             comb += update_svstate.eq(1)
 731                             # proceed to Decode
 732                             m.next = "DECODE_SV"
 733
 734                         # pass predicate mask bits through to satellite decoders
 735                         # TODO: for SIMD this will be *multiple* bits
 736                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 737                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 738
 739             # after src/dst step have been updated, we are ready
 740             # to decode the instruction
 741             with m.State("DECODE_SV"):
 742                 # decode the instruction
 743                 sync += core.e.eq(pdecode2.e)
 744                 sync += core.state.eq(cur_state)
 745                 sync += core.raw_insn_i.eq(dec_opcode_i)
 746                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 747                 if self.svp64_en:
 748                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 749                     # set RA_OR_ZERO detection in satellite decoders
 750                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 751                     # and svp64 detection
 752                     sync += core.is_svp64_mode.eq(is_svp64_mode)
 753                     # and svp64 bit-rev'd ldst mode
 754                     ldst_dec = pdecode2.use_svp64_ldst_dec
 755                     sync += core.use_svp64_ldst_dec.eq(ldst_dec)
 756
 757                 m.next = "INSN_EXECUTE"  # move to "execute"
 758
 759             # handshake with execution FSM, move to "wait" once acknowledged
 760             with m.State("INSN_EXECUTE"):
 761                 comb += exec_insn_valid_i.eq(1) # trigger execute
 762                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 763                     m.next = "EXECUTE_WAIT"
 764
 765             with m.State("EXECUTE_WAIT"):
 766                 # wait on "core stop" release, at instruction end
 767                 # need to do this here, in case we are in a VL>1 loop
 768                 with m.If(~dbg.core_stop_o & ~core_rst):
 769                     comb += exec_pc_ready_i.eq(1)
 770                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 771                     #with m.If(exec_pc_valid_o & exc_happened):
 772                     #    probably something like this:
 773                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 774                     # TODO: the exception info needs to be blatted
 775                     # into pdecode.ldst_exc, and the instruction "re-run".
 776                     # when ldst_exc.happened is set, the PowerDecoder2
 777                     # reacts very differently: it re-writes the instruction
 778                     # with a "trap" (calls PowerDecoder2.trap()) which
 779                     # will *overwrite* whatever was requested and jump the
 780                     # PC to the exception address, as well as alter MSR.
 781                     # nothing else needs to be done other than to note
 782                     # the change of PC and MSR (and, later, SVSTATE)
 783                     #with m.Elif(exec_pc_valid_o):
 784                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 785
 786                         # was this the last loop iteration?
 787                         is_last = Signal()
 788                         cur_vl = cur_state.svstate.vl
 789                         comb += is_last.eq(next_srcstep == cur_vl)
 790
 791                         # if either PC or SVSTATE were changed by the previous
 792                         # instruction, go directly back to Fetch, without
 793                         # updating either PC or SVSTATE
 794                         with m.If(pc_changed | sv_changed):
 795                             m.next = "ISSUE_START"
 796
 797                         # also return to Fetch, when no output was a vector
 798                         # (regardless of SRCSTEP and VL), or when the last
 799                         # instruction was really the last one of the VL loop
 800                         with m.Elif((~pdecode2.loop_continue) | is_last):
 801                             # before going back to fetch, update the PC state
 802                             # register with the NIA.
 803                             # ok here we are not reading the branch unit.
 804                             # TODO: this just blithely overwrites whatever
 805                             #       pipeline updated the PC
 806                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 807                             comb += self.state_w_pc.data_i.eq(nia)
 808                             # reset SRCSTEP before returning to Fetch
 809                             if self.svp64_en:
 810                                 with m.If(pdecode2.loop_continue):
 811                                     comb += new_svstate.srcstep.eq(0)
 812                                     comb += new_svstate.dststep.eq(0)
 813                                     comb += update_svstate.eq(1)
 814                             else:
 815                                 comb += new_svstate.srcstep.eq(0)
 816                                 comb += new_svstate.dststep.eq(0)
 817                                 comb += update_svstate.eq(1)
 818                             m.next = "ISSUE_START"
 819
 820                         # returning to Execute? then, first update SRCSTEP
 821                         with m.Else():
 822                             comb += new_svstate.srcstep.eq(next_srcstep)
 823                             comb += new_svstate.dststep.eq(next_dststep)
 824                             comb += update_svstate.eq(1)
 825                             # return to mask skip loop
 826                             m.next = "PRED_SKIP"
 827
 828                 with m.Else():
 829                     comb += dbg.core_stopped_i.eq(1)
 830                     # while stopped, allow updating the PC and SVSTATE
 831                     with m.If(self.pc_i.ok):
 832                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 833                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 834                         sync += pc_changed.eq(1)
 835                     with m.If(self.svstate_i.ok):
 836                         comb += new_svstate.eq(self.svstate_i.data)
 837                         comb += update_svstate.eq(1)
 838                         sync += sv_changed.eq(1)
 839
 840         # check if svstate needs updating: if so, write it to State Regfile
 841         with m.If(update_svstate):
 842             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 843             comb += self.state_w_sv.data_i.eq(new_svstate)
 844             sync += cur_state.svstate.eq(new_svstate) # for next clock
 845
 846     def execute_fsm(self, m, core, pc_changed, sv_changed,
 847                     exec_insn_valid_i, exec_insn_ready_o,
 848                     exec_pc_valid_o, exec_pc_ready_i):
 849         """execute FSM
 850
 851         execute FSM. this interacts with the "issue" FSM
 852         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 853         (outgoing). SVP64 RM prefixes have already been set up by the
 854         "issue" phase, so execute is fairly straightforward.
 855         """
 856
 857         comb = m.d.comb
 858         sync = m.d.sync
 859         pdecode2 = self.pdecode2
 860
 861         # temporaries
 862         core_busy_o = core.busy_o                 # core is busy
 863         core_ivalid_i = core.ivalid_i             # instruction is valid
 864         core_issue_i = core.issue_i               # instruction is issued
 865         insn_type = core.e.do.insn_type           # instruction MicroOp type
 866
 867         with m.FSM(name="exec_fsm"):
 868
 869             # waiting for instruction bus (stays there until not busy)
 870             with m.State("INSN_START"):
 871                 comb += exec_insn_ready_o.eq(1)
 872                 with m.If(exec_insn_valid_i):
 873                     comb += core_ivalid_i.eq(1)  # instruction is valid
 874                     comb += core_issue_i.eq(1)  # and issued
 875                     sync += sv_changed.eq(0)
 876                     sync += pc_changed.eq(0)
 877                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 878
 879             # instruction started: must wait till it finishes
 880             with m.State("INSN_ACTIVE"):
 881                 with m.If(insn_type != MicrOp.OP_NOP):
 882                     comb += core_ivalid_i.eq(1) # instruction is valid
 883                 # note changes to PC and SVSTATE
 884                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 885                     sync += sv_changed.eq(1)
 886                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 887                     sync += pc_changed.eq(1)
 888                 with m.If(~core_busy_o): # instruction done!
 889                     comb += exec_pc_valid_o.eq(1)
 890                     with m.If(exec_pc_ready_i):
 891                         comb += self.insn_done.eq(1)
 892                         m.next = "INSN_START"  # back to fetch
 893
 894     def setup_peripherals(self, m):
 895         comb, sync = m.d.comb, m.d.sync
 896
 897         # okaaaay so the debug module must be in coresync clock domain
 898         # but NOT its reset signal. to cope with this, set every single
 899         # submodule explicitly in coresync domain, debug and JTAG
 900         # in their own one but using *external* reset.
 901         csd = DomainRenamer("coresync")
 902         dbd = DomainRenamer(self.dbg_domain)
 903
 904         m.submodules.core = core = csd(self.core)
 905         m.submodules.imem = imem = csd(self.imem)
 906         m.submodules.dbg = dbg = dbd(self.dbg)
 907         if self.jtag_en:
 908             m.submodules.jtag = jtag = dbd(self.jtag)
 909             # TODO: UART2GDB mux, here, from external pin
 910             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 911             sync += dbg.dmi.connect_to(jtag.dmi)
 912
 913         cur_state = self.cur_state
 914
 915         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 916         if self.sram4x4k:
 917             for i, sram in enumerate(self.sram4k):
 918                 m.submodules["sram4k_%d" % i] = csd(sram)
 919                 comb += sram.enable.eq(self.wb_sram_en)
 920
 921         # XICS interrupt handler
 922         if self.xics:
 923             m.submodules.xics_icp = icp = csd(self.xics_icp)
 924             m.submodules.xics_ics = ics = csd(self.xics_ics)
 925             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 926             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 927
 928         # GPIO test peripheral
 929         if self.gpio:
 930             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 931
 932         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 933         # XXX causes litex ECP5 test to get wrong idea about input and output
 934         # (but works with verilator sim *sigh*)
 935         #if self.gpio and self.xics:
 936         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 937
 938         # instruction decoder
 939         pdecode = create_pdecode()
 940         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 941         if self.svp64_en:
 942             m.submodules.svp64 = svp64 = csd(self.svp64)
 943
 944         # convenience
 945         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 946         intrf = self.core.regs.rf['int']
 947
 948         # clock delay power-on reset
 949         cd_por  = ClockDomain(reset_less=True)
 950         cd_sync = ClockDomain()
 951         core_sync = ClockDomain("coresync")
 952         m.domains += cd_por, cd_sync, core_sync
 953         if self.dbg_domain != "sync":
 954             dbg_sync = ClockDomain(self.dbg_domain)
 955             m.domains += dbg_sync
 956
 957         ti_rst = Signal(reset_less=True)
 958         delay = Signal(range(4), reset=3)
 959         with m.If(delay != 0):
 960             m.d.por += delay.eq(delay - 1)
 961         comb += cd_por.clk.eq(ClockSignal())
 962
 963         # power-on reset delay
 964         core_rst = ResetSignal("coresync")
 965         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 966         comb += core_rst.eq(ti_rst)
 967
 968         # debug clock is same as coresync, but reset is *main external*
 969         if self.dbg_domain != "sync":
 970             dbg_rst = ResetSignal(self.dbg_domain)
 971             comb += dbg_rst.eq(ResetSignal())
 972
 973         # busy/halted signals from core
 974         comb += self.busy_o.eq(core.busy_o)
 975         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 976
 977         # temporary hack: says "go" immediately for both address gen and ST
 978         l0 = core.l0
 979         ldst = core.fus.fus['ldst0']
 980         st_go_edge = rising_edge(m, ldst.st.rel_o)
 981         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 982         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 983
 984     def elaborate(self, platform):
 985         m = Module()
 986         # convenience
 987         comb, sync = m.d.comb, m.d.sync
 988         cur_state = self.cur_state
 989         pdecode2 = self.pdecode2
 990         dbg = self.dbg
 991         core = self.core
 992
 993         # set up peripherals and core
 994         core_rst = self.core_rst
 995         self.setup_peripherals(m)
 996
 997         # reset current state if core reset requested
 998         with m.If(core_rst):
 999             m.d.sync += self.cur_state.eq(0)
1000
1001         # PC and instruction from I-Memory
1002         comb += self.pc_o.eq(cur_state.pc)
1003         pc_changed = Signal() # note write to PC
1004         sv_changed = Signal() # note write to SVSTATE
1005
1006         # read state either from incoming override or from regfile
1007         # TODO: really should be doing MSR in the same way
1008         pc = state_get(m, core_rst, self.pc_i,
1009                             "pc",                  # read PC
1010                             self.state_r_pc, StateRegs.PC)
1011         svstate = state_get(m, core_rst, self.svstate_i,
1012                             "svstate",   # read SVSTATE
1013                             self.state_r_sv, StateRegs.SVSTATE)
1014
1015         # don't write pc every cycle
1016         comb += self.state_w_pc.wen.eq(0)
1017         comb += self.state_w_pc.data_i.eq(0)
1018
1019         # don't read msr every cycle
1020         comb += self.state_r_msr.ren.eq(0)
1021
1022         # address of the next instruction, in the absence of a branch
1023         # depends on the instruction size
1024         nia = Signal(64)
1025
1026         # connect up debug signals
1027         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1028         comb += dbg.terminate_i.eq(core.core_terminate_o)
1029         comb += dbg.state.pc.eq(pc)
1030         comb += dbg.state.svstate.eq(svstate)
1031         comb += dbg.state.msr.eq(cur_state.msr)
1032
1033         # pass the prefix mode from Fetch to Issue, so the latter can loop
1034         # on VL==0
1035         is_svp64_mode = Signal()
1036
1037         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1038         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1039         # these are the handshake signals between each
1040
1041         # fetch FSM can run as soon as the PC is valid
1042         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1043         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1044
1045         # fetch FSM hands over the instruction to be decoded / issued
1046         fetch_insn_valid_o = Signal()
1047         fetch_insn_ready_i = Signal()
1048
1049         # predicate fetch FSM decodes and fetches the predicate
1050         pred_insn_valid_i = Signal()
1051         pred_insn_ready_o = Signal()
1052
1053         # predicate fetch FSM delivers the masks
1054         pred_mask_valid_o = Signal()
1055         pred_mask_ready_i = Signal()
1056
1057         # issue FSM delivers the instruction to the be executed
1058         exec_insn_valid_i = Signal()
1059         exec_insn_ready_o = Signal()
1060
1061         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1062         exec_pc_valid_o = Signal()
1063         exec_pc_ready_i = Signal()
1064
1065         # the FSMs here are perhaps unusual in that they detect conditions
1066         # then "hold" information, combinatorially, for the core
1067         # (as opposed to using sync - which would be on a clock's delay)
1068         # this includes the actual opcode, valid flags and so on.
1069
1070         # Fetch, then predicate fetch, then Issue, then Execute.
1071         # Issue is where the VL for-loop # lives.  the ready/valid
1072         # signalling is used to communicate between the four.
1073
1074         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1075                        fetch_pc_ready_o, fetch_pc_valid_i,
1076                        fetch_insn_valid_o, fetch_insn_ready_i)
1077
1078         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1079                        dbg, core_rst, is_svp64_mode,
1080                        fetch_pc_ready_o, fetch_pc_valid_i,
1081                        fetch_insn_valid_o, fetch_insn_ready_i,
1082                        pred_insn_valid_i, pred_insn_ready_o,
1083                        pred_mask_valid_o, pred_mask_ready_i,
1084                        exec_insn_valid_i, exec_insn_ready_o,
1085                        exec_pc_valid_o, exec_pc_ready_i)
1086
1087         if self.svp64_en:
1088             self.fetch_predicate_fsm(m,
1089                                      pred_insn_valid_i, pred_insn_ready_o,
1090                                      pred_mask_valid_o, pred_mask_ready_i)
1091
1092         self.execute_fsm(m, core, pc_changed, sv_changed,
1093                          exec_insn_valid_i, exec_insn_ready_o,
1094                          exec_pc_valid_o, exec_pc_ready_i)
1095
1096         # whatever was done above, over-ride it if core reset is held
1097         with m.If(core_rst):
1098             sync += nia.eq(0)
1099
1100         # this bit doesn't have to be in the FSM: connect up to read
1101         # regfiles on demand from DMI
1102         self.do_dmi(m, dbg)
1103
1104         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1105         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1106         self.tb_dec_fsm(m, cur_state.dec)
1107
1108         return m
1109
1110     def do_dmi(self, m, dbg):
1111         """deals with DMI debug requests
1112
1113         currently only provides read requests for the INT regfile, CR and XER
1114         it will later also deal with *writing* to these regfiles.
1115         """
1116         comb = m.d.comb
1117         sync = m.d.sync
1118         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1119         intrf = self.core.regs.rf['int']
1120
1121         with m.If(d_reg.req): # request for regfile access being made
1122             # TODO: error-check this
1123             # XXX should this be combinatorial?  sync better?
1124             if intrf.unary:
1125                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1126             else:
1127                 comb += self.int_r.addr.eq(d_reg.addr)
1128                 comb += self.int_r.ren.eq(1)
1129         d_reg_delay  = Signal()
1130         sync += d_reg_delay.eq(d_reg.req)
1131         with m.If(d_reg_delay):
1132             # data arrives one clock later
1133             comb += d_reg.data.eq(self.int_r.data_o)
1134             comb += d_reg.ack.eq(1)
1135
1136         # sigh same thing for CR debug
1137         with m.If(d_cr.req): # request for regfile access being made
1138             comb += self.cr_r.ren.eq(0b11111111) # enable all
1139         d_cr_delay  = Signal()
1140         sync += d_cr_delay.eq(d_cr.req)
1141         with m.If(d_cr_delay):
1142             # data arrives one clock later
1143             comb += d_cr.data.eq(self.cr_r.data_o)
1144             comb += d_cr.ack.eq(1)
1145
1146         # aaand XER...
1147         with m.If(d_xer.req): # request for regfile access being made
1148             comb += self.xer_r.ren.eq(0b111111) # enable all
1149         d_xer_delay  = Signal()
1150         sync += d_xer_delay.eq(d_xer.req)
1151         with m.If(d_xer_delay):
1152             # data arrives one clock later
1153             comb += d_xer.data.eq(self.xer_r.data_o)
1154             comb += d_xer.ack.eq(1)
1155
1156     def tb_dec_fsm(self, m, spr_dec):
1157         """tb_dec_fsm
1158
1159         this is a FSM for updating either dec or tb.  it runs alternately
1160         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1161         value to DEC, however the regfile has "passthrough" on it so this
1162         *should* be ok.
1163
1164         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1165         """
1166
1167         comb, sync = m.d.comb, m.d.sync
1168         fast_rf = self.core.regs.rf['fast']
1169         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1170         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1171
1172         with m.FSM() as fsm:
1173
1174             # initiates read of current DEC
1175             with m.State("DEC_READ"):
1176                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1177                 comb += fast_r_dectb.ren.eq(1)
1178                 m.next = "DEC_WRITE"
1179
1180             # waits for DEC read to arrive (1 cycle), updates with new value
1181             with m.State("DEC_WRITE"):
1182                 new_dec = Signal(64)
1183                 # TODO: MSR.LPCR 32-bit decrement mode
1184                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1185                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1186                 comb += fast_w_dectb.wen.eq(1)
1187                 comb += fast_w_dectb.data_i.eq(new_dec)
1188                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1189                 m.next = "TB_READ"
1190
1191             # initiates read of current TB
1192             with m.State("TB_READ"):
1193                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1194                 comb += fast_r_dectb.ren.eq(1)
1195                 m.next = "TB_WRITE"
1196
1197             # waits for read TB to arrive, initiates write of current TB
1198             with m.State("TB_WRITE"):
1199                 new_tb = Signal(64)
1200                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1201                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1202                 comb += fast_w_dectb.wen.eq(1)
1203                 comb += fast_w_dectb.data_i.eq(new_tb)
1204                 m.next = "DEC_READ"
1205
1206         return m
1207
1208     def __iter__(self):
1209         yield from self.pc_i.ports()
1210         yield self.pc_o
1211         yield self.memerr_o
1212         yield from self.core.ports()
1213         yield from self.imem.ports()
1214         yield self.core_bigendian_i
1215         yield self.busy_o
1216
1217     def ports(self):
1218         return list(self)
1219
1220     def external_ports(self):
1221         ports = self.pc_i.ports()
1222         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1223                 ]
1224
1225         if self.jtag_en:
1226             ports += list(self.jtag.external_ports())
1227         else:
1228             # don't add DMI if JTAG is enabled
1229             ports += list(self.dbg.dmi.ports())
1230
1231         ports += list(self.imem.ibus.fields.values())
1232         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1233
1234         if self.sram4x4k:
1235             for sram in self.sram4k:
1236                 ports += list(sram.bus.fields.values())
1237
1238         if self.xics:
1239             ports += list(self.xics_icp.bus.fields.values())
1240             ports += list(self.xics_ics.bus.fields.values())
1241             ports.append(self.int_level_i)
1242
1243         if self.gpio:
1244             ports += list(self.simple_gpio.bus.fields.values())
1245             ports.append(self.gpio_o)
1246
1247         return ports
1248
1249     def ports(self):
1250         return list(self)
1251
1252
1253 class TestIssuer(Elaboratable):
1254     def __init__(self, pspec):
1255         self.ti = TestIssuerInternal(pspec)
1256         self.pll = DummyPLL(instance=True)
1257
1258         # PLL direct clock or not
1259         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1260         if self.pll_en:
1261             self.pll_test_o = Signal(reset_less=True)
1262             self.pll_vco_o = Signal(reset_less=True)
1263             self.clk_sel_i = Signal(2, reset_less=True)
1264             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1265             self.pllclk_clk = ClockSignal("pllclk")
1266
1267     def elaborate(self, platform):
1268         m = Module()
1269         comb = m.d.comb
1270
1271         # TestIssuer nominally runs at main clock, actually it is
1272         # all combinatorial internally except for coresync'd components
1273         m.submodules.ti = ti = self.ti
1274
1275         if self.pll_en:
1276             # ClockSelect runs at PLL output internal clock rate
1277             m.submodules.wrappll = pll = self.pll
1278
1279             # add clock domains from PLL
1280             cd_pll = ClockDomain("pllclk")
1281             m.domains += cd_pll
1282
1283             # PLL clock established.  has the side-effect of running clklsel
1284             # at the PLL's speed (see DomainRenamer("pllclk") above)
1285             pllclk = self.pllclk_clk
1286             comb += pllclk.eq(pll.clk_pll_o)
1287
1288             # wire up external 24mhz to PLL
1289             #comb += pll.clk_24_i.eq(self.ref_clk)
1290             # output 18 mhz PLL test signal, and analog oscillator out
1291             comb += self.pll_test_o.eq(pll.pll_test_o)
1292             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1293
1294             # input to pll clock selection
1295             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1296
1297             # now wire up ResetSignals.  don't mind them being in this domain
1298             pll_rst = ResetSignal("pllclk")
1299             comb += pll_rst.eq(ResetSignal())
1300
1301         # internal clock is set to selector clock-out.  has the side-effect of
1302         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1303         # debug clock runs at coresync internal clock
1304         cd_coresync = ClockDomain("coresync")
1305         #m.domains += cd_coresync
1306         if self.ti.dbg_domain != 'sync':
1307             cd_dbgsync = ClockDomain("dbgsync")
1308             #m.domains += cd_dbgsync
1309         intclk = ClockSignal("coresync")
1310         dbgclk = ClockSignal(self.ti.dbg_domain)
1311         # XXX BYPASS PLL XXX
1312         # XXX BYPASS PLL XXX
1313         # XXX BYPASS PLL XXX
1314         if self.pll_en:
1315             comb += intclk.eq(self.ref_clk)
1316         else:
1317             comb += intclk.eq(ClockSignal())
1318         if self.ti.dbg_domain != 'sync':
1319             dbgclk = ClockSignal(self.ti.dbg_domain)
1320             comb += dbgclk.eq(intclk)
1321
1322         return m
1323
1324     def ports(self):
1325         return list(self.ti.ports()) + list(self.pll.ports()) + \
1326                [ClockSignal(), ResetSignal()]
1327
1328     def external_ports(self):
1329         ports = self.ti.external_ports()
1330         ports.append(ClockSignal())
1331         ports.append(ResetSignal())
1332         if self.pll_en:
1333             ports.append(self.clk_sel_i)
1334             ports.append(self.pll.clk_24_i)
1335             ports.append(self.pll_test_o)
1336             ports.append(self.pll_vco_o)
1337             ports.append(self.pllclk_clk)
1338             ports.append(self.ref_clk)
1339         return ports
1340
1341
1342 if __name__ == '__main__':
1343     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1344              'spr': 1,
1345              'div': 1,
1346              'mul': 1,
1347              'shiftrot': 1
1348             }
1349     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1350                          imem_ifacetype='bare_wb',
1351                          addr_wid=48,
1352                          mask_wid=8,
1353                          reg_wid=64,
1354                          units=units)
1355     dut = TestIssuer(pspec)
1356     vl = main(dut, ports=dut.ports(), name="test_issuer")
1357
1358     if len(sys.argv) == 1:
1359         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1360         with open("test_issuer.il", "w") as f:
1361             f.write(vl)