src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         if self.jtag_en:
 174             # XXX MUST keep this up-to-date with litex, and
 175             # soc-cocotb-sim, and err.. all needs sorting out, argh
 176             subset = ['uart',
 177                       'mtwi',
 178                       'eint', 'gpio', 'mspi0',
 179                       # 'mspi1', - disabled for now
 180                       # 'pwm', 'sd0', - disabled for now
 181                        'sdr']
 182             self.jtag = JTAG(get_pinspecs(subset=subset))
 183             # add signals to pspec to enable/disable icache and dcache
 184             # (or data and intstruction wishbone if icache/dcache not included)
 185             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 186             # TODO: do we actually care if these are not domain-synchronised?
 187             # honestly probably not.
 188             pspec.wb_icache_en = self.jtag.wb_icache_en
 189             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 190             self.wb_sram_en = self.jtag.wb_sram_en
 191         else:
 192             self.wb_sram_en = Const(1)
 193
 194         # add 4k sram blocks?
 195         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 196                          pspec.sram4x4kblock == True)
 197         if self.sram4x4k:
 198             self.sram4k = []
 199             for i in range(4):
 200                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 201                                                     features={'err'}))
 202
 203         # add interrupt controller?
 204         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 205         if self.xics:
 206             self.xics_icp = XICS_ICP()
 207             self.xics_ics = XICS_ICS()
 208             self.int_level_i = self.xics_ics.int_level_i
 209
 210         # add GPIO peripheral?
 211         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 212         if self.gpio:
 213             self.simple_gpio = SimpleGPIO()
 214             self.gpio_o = self.simple_gpio.gpio_o
 215
 216         # main instruction core.  suitable for prototyping / demo only
 217         self.core = core = NonProductionCore(pspec)
 218
 219         # instruction decoder.  goes into Trap Record
 220         pdecode = create_pdecode()
 221         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 222         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
 223                                      opkls=IssuerDecode2ToOperand,
 224                                      svp64_en=self.svp64_en,
 225                                      regreduce_en=self.regreduce_en)
 226         if self.svp64_en:
 227             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 228
 229         # Test Instruction memory
 230         self.imem = ConfigFetchUnit(pspec).fu
 231
 232         # DMI interface
 233         self.dbg = CoreDebug()
 234
 235         # instruction go/monitor
 236         self.pc_o = Signal(64, reset_less=True)
 237         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 238         self.svstate_i = Data(32, "svstate_i") # ditto
 239         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 240         self.busy_o = Signal(reset_less=True)
 241         self.memerr_o = Signal(reset_less=True)
 242
 243         # STATE regfile read /write ports for PC, MSR, SVSTATE
 244         staterf = self.core.regs.rf['state']
 245         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 246         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 247         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 248         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 249         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 250
 251         # DMI interface access
 252         intrf = self.core.regs.rf['int']
 253         crrf = self.core.regs.rf['cr']
 254         xerrf = self.core.regs.rf['xer']
 255         self.int_r = intrf.r_ports['dmi'] # INT read
 256         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 257         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 258
 259         if self.svp64_en:
 260             # for predication
 261             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 262             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 263
 264         # hack method of keeping an eye on whether branch/trap set the PC
 265         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 266         self.state_nia.wen.name = 'state_nia_wen'
 267
 268         # pulse to synchronize the simulator at instruction end
 269         self.insn_done = Signal()
 270
 271         if self.svp64_en:
 272             # store copies of predicate masks
 273             self.srcmask = Signal(64)
 274             self.dstmask = Signal(64)
 275
 276     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 277                         fetch_pc_ready_o, fetch_pc_valid_i,
 278                         fetch_insn_valid_o, fetch_insn_ready_i):
 279         """fetch FSM
 280
 281         this FSM performs fetch of raw instruction data, partial-decodes
 282         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 283         read a 2nd 32-bit quantity if that occurs.
 284         """
 285         comb = m.d.comb
 286         sync = m.d.sync
 287         pdecode2 = self.pdecode2
 288         cur_state = self.cur_state
 289         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 290
 291         msr_read = Signal(reset=1)
 292
 293         with m.FSM(name='fetch_fsm'):
 294
 295             # waiting (zzz)
 296             with m.State("IDLE"):
 297                 comb += fetch_pc_ready_o.eq(1)
 298                 with m.If(fetch_pc_valid_i):
 299                     # instruction allowed to go: start by reading the PC
 300                     # capture the PC and also drop it into Insn Memory
 301                     # we have joined a pair of combinatorial memory
 302                     # lookups together.  this is Generally Bad.
 303                     comb += self.imem.a_pc_i.eq(pc)
 304                     comb += self.imem.a_valid_i.eq(1)
 305                     comb += self.imem.f_valid_i.eq(1)
 306                     sync += cur_state.pc.eq(pc)
 307                     sync += cur_state.svstate.eq(svstate) # and svstate
 308
 309                     # initiate read of MSR. arrives one clock later
 310                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 311                     sync += msr_read.eq(0)
 312
 313                     m.next = "INSN_READ"  # move to "wait for bus" phase
 314
 315             # dummy pause to find out why simulation is not keeping up
 316             with m.State("INSN_READ"):
 317                 # one cycle later, msr/sv read arrives.  valid only once.
 318                 with m.If(~msr_read):
 319                     sync += msr_read.eq(1) # yeah don't read it again
 320                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 321                 with m.If(self.imem.f_busy_o): # zzz...
 322                     # busy: stay in wait-read
 323                     comb += self.imem.a_valid_i.eq(1)
 324                     comb += self.imem.f_valid_i.eq(1)
 325                 with m.Else():
 326                     # not busy: instruction fetched
 327                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 328                     if self.svp64_en:
 329                         svp64 = self.svp64
 330                         # decode the SVP64 prefix, if any
 331                         comb += svp64.raw_opcode_in.eq(insn)
 332                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 333                         # pass the decoded prefix (if any) to PowerDecoder2
 334                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 335                         # remember whether this is a prefixed instruction, so
 336                         # the FSM can readily loop when VL==0
 337                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 338                         # calculate the address of the following instruction
 339                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 340                         sync += nia.eq(cur_state.pc + insn_size)
 341                         with m.If(~svp64.is_svp64_mode):
 342                             # with no prefix, store the instruction
 343                             # and hand it directly to the next FSM
 344                             sync += dec_opcode_i.eq(insn)
 345                             m.next = "INSN_READY"
 346                         with m.Else():
 347                             # fetch the rest of the instruction from memory
 348                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 349                             comb += self.imem.a_valid_i.eq(1)
 350                             comb += self.imem.f_valid_i.eq(1)
 351                             m.next = "INSN_READ2"
 352                     else:
 353                         # not SVP64 - 32-bit only
 354                         sync += nia.eq(cur_state.pc + 4)
 355                         sync += dec_opcode_i.eq(insn)
 356                         m.next = "INSN_READY"
 357
 358             with m.State("INSN_READ2"):
 359                 with m.If(self.imem.f_busy_o):  # zzz...
 360                     # busy: stay in wait-read
 361                     comb += self.imem.a_valid_i.eq(1)
 362                     comb += self.imem.f_valid_i.eq(1)
 363                 with m.Else():
 364                     # not busy: instruction fetched
 365                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 366                     sync += dec_opcode_i.eq(insn)
 367                     m.next = "INSN_READY"
 368                     # TODO: probably can start looking at pdecode2.rm_dec
 369                     # here or maybe even in INSN_READ state, if svp64_mode
 370                     # detected, in order to trigger - and wait for - the
 371                     # predicate reading.
 372                     if self.svp64_en:
 373                         pmode = pdecode2.rm_dec.predmode
 374                     """
 375                     if pmode != SVP64PredMode.ALWAYS.value:
 376                         fire predicate loading FSM and wait before
 377                         moving to INSN_READY
 378                     else:
 379                         sync += self.srcmask.eq(-1) # set to all 1s
 380                         sync += self.dstmask.eq(-1) # set to all 1s
 381                         m.next = "INSN_READY"
 382                     """
 383
 384             with m.State("INSN_READY"):
 385                 # hand over the instruction, to be decoded
 386                 comb += fetch_insn_valid_o.eq(1)
 387                 with m.If(fetch_insn_ready_i):
 388                     m.next = "IDLE"
 389
 390     def fetch_predicate_fsm(self, m,
 391                             pred_insn_valid_i, pred_insn_ready_o,
 392                             pred_mask_valid_o, pred_mask_ready_i):
 393         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 394            src/dest predicate masks
 395
 396         https://bugs.libre-soc.org/show_bug.cgi?id=617
 397         the predicates can be read here, by using IntRegs r_ports['pred']
 398         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 399         be done through multiple reads, extracting one relevant at a time.
 400         later, a faster way would be to use the 32-bit-wide CR port but
 401         this is more complex decoding, here.  equivalent code used in
 402         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 403
 404         note: this ENTIRE FSM is not to be called when svp64 is disabled
 405         """
 406         comb = m.d.comb
 407         sync = m.d.sync
 408         pdecode2 = self.pdecode2
 409         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 410         predmode = rm_dec.predmode
 411         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 412         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 413         # get src/dst step, so we can skip already used mask bits
 414         cur_state = self.cur_state
 415         srcstep = cur_state.svstate.srcstep
 416         dststep = cur_state.svstate.dststep
 417         cur_vl = cur_state.svstate.vl
 418
 419         # decode predicates
 420         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 421         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 422         sidx, scrinvert = get_predcr(m, srcpred, 's')
 423         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 424
 425         # store fetched masks, for either intpred or crpred
 426         # when src/dst step is not zero, the skipped mask bits need to be
 427         # shifted-out, before actually storing them in src/dest mask
 428         new_srcmask = Signal(64, reset_less=True)
 429         new_dstmask = Signal(64, reset_less=True)
 430
 431         with m.FSM(name="fetch_predicate"):
 432
 433             with m.State("FETCH_PRED_IDLE"):
 434                 comb += pred_insn_ready_o.eq(1)
 435                 with m.If(pred_insn_valid_i):
 436                     with m.If(predmode == SVP64PredMode.INT):
 437                         # skip fetching destination mask register, when zero
 438                         with m.If(dall1s):
 439                             sync += new_dstmask.eq(-1)
 440                             # directly go to fetch source mask register
 441                             # guaranteed not to be zero (otherwise predmode
 442                             # would be SVP64PredMode.ALWAYS, not INT)
 443                             comb += int_pred.addr.eq(sregread)
 444                             comb += int_pred.ren.eq(1)
 445                             m.next = "INT_SRC_READ"
 446                         # fetch destination predicate register
 447                         with m.Else():
 448                             comb += int_pred.addr.eq(dregread)
 449                             comb += int_pred.ren.eq(1)
 450                             m.next = "INT_DST_READ"
 451                     with m.Elif(predmode == SVP64PredMode.CR):
 452                         # go fetch masks from the CR register file
 453                         sync += new_srcmask.eq(0)
 454                         sync += new_dstmask.eq(0)
 455                         m.next = "CR_READ"
 456                     with m.Else():
 457                         sync += self.srcmask.eq(-1)
 458                         sync += self.dstmask.eq(-1)
 459                         m.next = "FETCH_PRED_DONE"
 460
 461             with m.State("INT_DST_READ"):
 462                 # store destination mask
 463                 inv = Repl(dinvert, 64)
 464                 with m.If(dunary):
 465                     # set selected mask bit for 1<<r3 mode
 466                     dst_shift = Signal(range(64))
 467                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 468                     sync += new_dstmask.eq(1 << dst_shift)
 469                 with m.Else():
 470                     # invert mask if requested
 471                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 472                 # skip fetching source mask register, when zero
 473                 with m.If(sall1s):
 474                     sync += new_srcmask.eq(-1)
 475                     m.next = "FETCH_PRED_SHIFT_MASK"
 476                 # fetch source predicate register
 477                 with m.Else():
 478                     comb += int_pred.addr.eq(sregread)
 479                     comb += int_pred.ren.eq(1)
 480                     m.next = "INT_SRC_READ"
 481
 482             with m.State("INT_SRC_READ"):
 483                 # store source mask
 484                 inv = Repl(sinvert, 64)
 485                 with m.If(sunary):
 486                     # set selected mask bit for 1<<r3 mode
 487                     src_shift = Signal(range(64))
 488                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 489                     sync += new_srcmask.eq(1 << src_shift)
 490                 with m.Else():
 491                     # invert mask if requested
 492                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 493                 m.next = "FETCH_PRED_SHIFT_MASK"
 494
 495             # fetch masks from the CR register file
 496             # implements the following loop:
 497             # idx, inv = get_predcr(mask)
 498             # mask = 0
 499             # for cr_idx in range(vl):
 500             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 501             #     if cr[idx] ^ inv:
 502             #         mask |= 1 << cr_idx
 503             # return mask
 504             with m.State("CR_READ"):
 505                 # CR index to be read, which will be ready by the next cycle
 506                 cr_idx = Signal.like(cur_vl, reset_less=True)
 507                 # submit the read operation to the regfile
 508                 with m.If(cr_idx != cur_vl):
 509                     # the CR read port is unary ...
 510                     # ren = 1 << cr_idx
 511                     # ... in MSB0 convention ...
 512                     # ren = 1 << (7 - cr_idx)
 513                     # ... and with an offset:
 514                     # ren = 1 << (7 - off - cr_idx)
 515                     idx = SVP64CROffs.CRPred + cr_idx
 516                     comb += cr_pred.ren.eq(1 << (7 - idx))
 517                     # signal data valid in the next cycle
 518                     cr_read = Signal(reset_less=True)
 519                     sync += cr_read.eq(1)
 520                     # load the next index
 521                     sync += cr_idx.eq(cr_idx + 1)
 522                 with m.Else():
 523                     # exit on loop end
 524                     sync += cr_read.eq(0)
 525                     sync += cr_idx.eq(0)
 526                     m.next = "FETCH_PRED_SHIFT_MASK"
 527                 with m.If(cr_read):
 528                     # compensate for the one cycle delay on the regfile
 529                     cur_cr_idx = Signal.like(cur_vl)
 530                     comb += cur_cr_idx.eq(cr_idx - 1)
 531                     # read the CR field, select the appropriate bit
 532                     cr_field = Signal(4)
 533                     scr_bit = Signal()
 534                     dcr_bit = Signal()
 535                     comb += cr_field.eq(cr_pred.data_o)
 536                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 537                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 538                     # set the corresponding mask bit
 539                     bit_to_set = Signal.like(self.srcmask)
 540                     comb += bit_to_set.eq(1 << cur_cr_idx)
 541                     with m.If(scr_bit):
 542                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 543                     with m.If(dcr_bit):
 544                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 545
 546             with m.State("FETCH_PRED_SHIFT_MASK"):
 547                 # shift-out skipped mask bits
 548                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 549                 sync += self.dstmask.eq(new_dstmask >> dststep)
 550                 m.next = "FETCH_PRED_DONE"
 551
 552             with m.State("FETCH_PRED_DONE"):
 553                 comb += pred_mask_valid_o.eq(1)
 554                 with m.If(pred_mask_ready_i):
 555                     m.next = "FETCH_PRED_IDLE"
 556
 557     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 558                   dbg, core_rst, is_svp64_mode,
 559                   fetch_pc_ready_o, fetch_pc_valid_i,
 560                   fetch_insn_valid_o, fetch_insn_ready_i,
 561                   pred_insn_valid_i, pred_insn_ready_o,
 562                   pred_mask_valid_o, pred_mask_ready_i,
 563                   exec_insn_valid_i, exec_insn_ready_o,
 564                   exec_pc_valid_o, exec_pc_ready_i):
 565         """issue FSM
 566
 567         decode / issue FSM.  this interacts with the "fetch" FSM
 568         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 569         (outgoing). also interacts with the "execute" FSM
 570         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 571         (incoming).
 572         SVP64 RM prefixes have already been set up by the
 573         "fetch" phase, so execute is fairly straightforward.
 574         """
 575
 576         comb = m.d.comb
 577         sync = m.d.sync
 578         pdecode2 = self.pdecode2
 579         cur_state = self.cur_state
 580
 581         # temporaries
 582         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 583
 584         # for updating svstate (things like srcstep etc.)
 585         update_svstate = Signal() # set this (below) if updating
 586         new_svstate = SVSTATERec("new_svstate")
 587         comb += new_svstate.eq(cur_state.svstate)
 588
 589         # precalculate srcstep+1 and dststep+1
 590         cur_srcstep = cur_state.svstate.srcstep
 591         cur_dststep = cur_state.svstate.dststep
 592         next_srcstep = Signal.like(cur_srcstep)
 593         next_dststep = Signal.like(cur_dststep)
 594         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 595         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 596
 597         # note if an exception happened.  in a pipelined or OoO design
 598         # this needs to be accompanied by "shadowing" (or stalling)
 599         el = []
 600         for exc in core.fus.excs.values():
 601             el.append(exc.happened)
 602         exc_happened = Signal()
 603         if len(el) > 0: # at least one exception
 604             comb += exc_happened.eq(Cat(*el).bool())
 605
 606         with m.FSM(name="issue_fsm"):
 607
 608             # sync with the "fetch" phase which is reading the instruction
 609             # at this point, there is no instruction running, that
 610             # could inadvertently update the PC.
 611             with m.State("ISSUE_START"):
 612                 # wait on "core stop" release, before next fetch
 613                 # need to do this here, in case we are in a VL==0 loop
 614                 with m.If(~dbg.core_stop_o & ~core_rst):
 615                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 616                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 617                         m.next = "INSN_WAIT"
 618                 with m.Else():
 619                     # tell core it's stopped, and acknowledge debug handshake
 620                     comb += dbg.core_stopped_i.eq(1)
 621                     # while stopped, allow updating the PC and SVSTATE
 622                     with m.If(self.pc_i.ok):
 623                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 624                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 625                         sync += pc_changed.eq(1)
 626                     with m.If(self.svstate_i.ok):
 627                         comb += new_svstate.eq(self.svstate_i.data)
 628                         comb += update_svstate.eq(1)
 629                         sync += sv_changed.eq(1)
 630
 631             # wait for an instruction to arrive from Fetch
 632             with m.State("INSN_WAIT"):
 633                 comb += fetch_insn_ready_i.eq(1)
 634                 with m.If(fetch_insn_valid_o):
 635                     # loop into ISSUE_START if it's a SVP64 instruction
 636                     # and VL == 0.  this because VL==0 is a for-loop
 637                     # from 0 to 0 i.e. always, always a NOP.
 638                     cur_vl = cur_state.svstate.vl
 639                     with m.If(is_svp64_mode & (cur_vl == 0)):
 640                         # update the PC before fetching the next instruction
 641                         # since we are in a VL==0 loop, no instruction was
 642                         # executed that we could be overwriting
 643                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 644                         comb += self.state_w_pc.data_i.eq(nia)
 645                         comb += self.insn_done.eq(1)
 646                         m.next = "ISSUE_START"
 647                     with m.Else():
 648                         if self.svp64_en:
 649                             m.next = "PRED_START"  # start fetching predicate
 650                         else:
 651                             m.next = "DECODE_SV"  # skip predication
 652
 653             with m.State("PRED_START"):
 654                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 655                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 656                     m.next = "MASK_WAIT"
 657
 658             with m.State("MASK_WAIT"):
 659                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 660                 with m.If(pred_mask_valid_o): # predication masks are ready
 661                     m.next = "PRED_SKIP"
 662
 663             # skip zeros in predicate
 664             with m.State("PRED_SKIP"):
 665                 with m.If(~is_svp64_mode):
 666                     m.next = "DECODE_SV"  # nothing to do
 667                 with m.Else():
 668                     if self.svp64_en:
 669                         pred_src_zero = pdecode2.rm_dec.pred_sz
 670                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 671
 672                         # new srcstep, after skipping zeros
 673                         skip_srcstep = Signal.like(cur_srcstep)
 674                         # value to be added to the current srcstep
 675                         src_delta = Signal.like(cur_srcstep)
 676                         # add leading zeros to srcstep, if not in zero mode
 677                         with m.If(~pred_src_zero):
 678                             # priority encoder (count leading zeros)
 679                             # append guard bit, in case the mask is all zeros
 680                             pri_enc_src = PriorityEncoder(65)
 681                             m.submodules.pri_enc_src = pri_enc_src
 682                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 683                                                          Const(1, 1)))
 684                             comb += src_delta.eq(pri_enc_src.o)
 685                         # apply delta to srcstep
 686                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 687                         # shift-out all leading zeros from the mask
 688                         # plus the leading "one" bit
 689                         # TODO count leading zeros and shift-out the zero
 690                         #      bits, in the same step, in hardware
 691                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 692
 693                         # same as above, but for dststep
 694                         skip_dststep = Signal.like(cur_dststep)
 695                         dst_delta = Signal.like(cur_dststep)
 696                         with m.If(~pred_dst_zero):
 697                             pri_enc_dst = PriorityEncoder(65)
 698                             m.submodules.pri_enc_dst = pri_enc_dst
 699                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 700                                                          Const(1, 1)))
 701                             comb += dst_delta.eq(pri_enc_dst.o)
 702                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 703                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 704
 705                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 706                         with m.If((skip_srcstep >= cur_vl) |
 707                                   (skip_dststep >= cur_vl)):
 708                             # end of VL loop. Update PC and reset src/dst step
 709                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 710                             comb += self.state_w_pc.data_i.eq(nia)
 711                             comb += new_svstate.srcstep.eq(0)
 712                             comb += new_svstate.dststep.eq(0)
 713                             comb += update_svstate.eq(1)
 714                             # synchronize with the simulator
 715                             comb += self.insn_done.eq(1)
 716                             # go back to Issue
 717                             m.next = "ISSUE_START"
 718                         with m.Else():
 719                             # update new src/dst step
 720                             comb += new_svstate.srcstep.eq(skip_srcstep)
 721                             comb += new_svstate.dststep.eq(skip_dststep)
 722                             comb += update_svstate.eq(1)
 723                             # proceed to Decode
 724                             m.next = "DECODE_SV"
 725
 726                         # pass predicate mask bits through to satellite decoders
 727                         # TODO: for SIMD this will be *multiple* bits
 728                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 729                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 730
 731             # after src/dst step have been updated, we are ready
 732             # to decode the instruction
 733             with m.State("DECODE_SV"):
 734                 # decode the instruction
 735                 sync += core.e.eq(pdecode2.e)
 736                 sync += core.state.eq(cur_state)
 737                 sync += core.raw_insn_i.eq(dec_opcode_i)
 738                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 739                 if self.svp64_en:
 740                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 741                     # set RA_OR_ZERO detection in satellite decoders
 742                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 743
 744                 m.next = "INSN_EXECUTE"  # move to "execute"
 745
 746             # handshake with execution FSM, move to "wait" once acknowledged
 747             with m.State("INSN_EXECUTE"):
 748                 comb += exec_insn_valid_i.eq(1) # trigger execute
 749                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 750                     m.next = "EXECUTE_WAIT"
 751
 752             with m.State("EXECUTE_WAIT"):
 753                 # wait on "core stop" release, at instruction end
 754                 # need to do this here, in case we are in a VL>1 loop
 755                 with m.If(~dbg.core_stop_o & ~core_rst):
 756                     comb += exec_pc_ready_i.eq(1)
 757                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 758                     #with m.If(exec_pc_valid_o & exc_happened):
 759                     #    probably something like this:
 760                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 761                     # TODO: the exception info needs to be blatted
 762                     # into pdecode.ldst_exc, and the instruction "re-run".
 763                     # when ldst_exc.happened is set, the PowerDecoder2
 764                     # reacts very differently: it re-writes the instruction
 765                     # with a "trap" (calls PowerDecoder2.trap()) which
 766                     # will *overwrite* whatever was requested and jump the
 767                     # PC to the exception address, as well as alter MSR.
 768                     # nothing else needs to be done other than to note
 769                     # the change of PC and MSR (and, later, SVSTATE)
 770                     #with m.Elif(exec_pc_valid_o):
 771                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 772
 773                         # was this the last loop iteration?
 774                         is_last = Signal()
 775                         cur_vl = cur_state.svstate.vl
 776                         comb += is_last.eq(next_srcstep == cur_vl)
 777
 778                         # if either PC or SVSTATE were changed by the previous
 779                         # instruction, go directly back to Fetch, without
 780                         # updating either PC or SVSTATE
 781                         with m.If(pc_changed | sv_changed):
 782                             m.next = "ISSUE_START"
 783
 784                         # also return to Fetch, when no output was a vector
 785                         # (regardless of SRCSTEP and VL), or when the last
 786                         # instruction was really the last one of the VL loop
 787                         with m.Elif((~pdecode2.loop_continue) | is_last):
 788                             # before going back to fetch, update the PC state
 789                             # register with the NIA.
 790                             # ok here we are not reading the branch unit.
 791                             # TODO: this just blithely overwrites whatever
 792                             #       pipeline updated the PC
 793                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 794                             comb += self.state_w_pc.data_i.eq(nia)
 795                             # reset SRCSTEP before returning to Fetch
 796                             if self.svp64_en:
 797                                 with m.If(pdecode2.loop_continue):
 798                                     comb += new_svstate.srcstep.eq(0)
 799                                     comb += new_svstate.dststep.eq(0)
 800                                     comb += update_svstate.eq(1)
 801                             else:
 802                                 comb += new_svstate.srcstep.eq(0)
 803                                 comb += new_svstate.dststep.eq(0)
 804                                 comb += update_svstate.eq(1)
 805                             m.next = "ISSUE_START"
 806
 807                         # returning to Execute? then, first update SRCSTEP
 808                         with m.Else():
 809                             comb += new_svstate.srcstep.eq(next_srcstep)
 810                             comb += new_svstate.dststep.eq(next_dststep)
 811                             comb += update_svstate.eq(1)
 812                             # return to mask skip loop
 813                             m.next = "PRED_SKIP"
 814
 815                 with m.Else():
 816                     comb += dbg.core_stopped_i.eq(1)
 817                     # while stopped, allow updating the PC and SVSTATE
 818                     with m.If(self.pc_i.ok):
 819                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 820                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 821                         sync += pc_changed.eq(1)
 822                     with m.If(self.svstate_i.ok):
 823                         comb += new_svstate.eq(self.svstate_i.data)
 824                         comb += update_svstate.eq(1)
 825                         sync += sv_changed.eq(1)
 826
 827         # check if svstate needs updating: if so, write it to State Regfile
 828         with m.If(update_svstate):
 829             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 830             comb += self.state_w_sv.data_i.eq(new_svstate)
 831             sync += cur_state.svstate.eq(new_svstate) # for next clock
 832
 833     def execute_fsm(self, m, core, pc_changed, sv_changed,
 834                     exec_insn_valid_i, exec_insn_ready_o,
 835                     exec_pc_valid_o, exec_pc_ready_i):
 836         """execute FSM
 837
 838         execute FSM. this interacts with the "issue" FSM
 839         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 840         (outgoing). SVP64 RM prefixes have already been set up by the
 841         "issue" phase, so execute is fairly straightforward.
 842         """
 843
 844         comb = m.d.comb
 845         sync = m.d.sync
 846         pdecode2 = self.pdecode2
 847
 848         # temporaries
 849         core_busy_o = core.busy_o                 # core is busy
 850         core_ivalid_i = core.ivalid_i             # instruction is valid
 851         core_issue_i = core.issue_i               # instruction is issued
 852         insn_type = core.e.do.insn_type           # instruction MicroOp type
 853
 854         with m.FSM(name="exec_fsm"):
 855
 856             # waiting for instruction bus (stays there until not busy)
 857             with m.State("INSN_START"):
 858                 comb += exec_insn_ready_o.eq(1)
 859                 with m.If(exec_insn_valid_i):
 860                     comb += core_ivalid_i.eq(1)  # instruction is valid
 861                     comb += core_issue_i.eq(1)  # and issued
 862                     sync += sv_changed.eq(0)
 863                     sync += pc_changed.eq(0)
 864                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 865
 866             # instruction started: must wait till it finishes
 867             with m.State("INSN_ACTIVE"):
 868                 with m.If(insn_type != MicrOp.OP_NOP):
 869                     comb += core_ivalid_i.eq(1) # instruction is valid
 870                 # note changes to PC and SVSTATE
 871                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 872                     sync += sv_changed.eq(1)
 873                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 874                     sync += pc_changed.eq(1)
 875                 with m.If(~core_busy_o): # instruction done!
 876                     comb += exec_pc_valid_o.eq(1)
 877                     with m.If(exec_pc_ready_i):
 878                         comb += self.insn_done.eq(1)
 879                         m.next = "INSN_START"  # back to fetch
 880
 881     def setup_peripherals(self, m):
 882         comb, sync = m.d.comb, m.d.sync
 883
 884         m.submodules.core = core = DomainRenamer("coresync")(self.core)
 885         m.submodules.imem = imem = self.imem
 886         m.submodules.dbg = dbg = self.dbg
 887         if self.jtag_en:
 888             m.submodules.jtag = jtag = self.jtag
 889             # TODO: UART2GDB mux, here, from external pin
 890             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 891             sync += dbg.dmi.connect_to(jtag.dmi)
 892
 893         cur_state = self.cur_state
 894
 895         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 896         if self.sram4x4k:
 897             for i, sram in enumerate(self.sram4k):
 898                 m.submodules["sram4k_%d" % i] = sram
 899                 comb += sram.enable.eq(self.wb_sram_en)
 900
 901         # XICS interrupt handler
 902         if self.xics:
 903             m.submodules.xics_icp = icp = self.xics_icp
 904             m.submodules.xics_ics = ics = self.xics_ics
 905             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 906             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 907
 908         # GPIO test peripheral
 909         if self.gpio:
 910             m.submodules.simple_gpio = simple_gpio = self.simple_gpio
 911
 912         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 913         # XXX causes litex ECP5 test to get wrong idea about input and output
 914         # (but works with verilator sim *sigh*)
 915         #if self.gpio and self.xics:
 916         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 917
 918         # instruction decoder
 919         pdecode = create_pdecode()
 920         m.submodules.dec2 = pdecode2 = self.pdecode2
 921         if self.svp64_en:
 922             m.submodules.svp64 = svp64 = self.svp64
 923
 924         # convenience
 925         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 926         intrf = self.core.regs.rf['int']
 927
 928         # clock delay power-on reset
 929         cd_por  = ClockDomain(reset_less=True)
 930         cd_sync = ClockDomain()
 931         core_sync = ClockDomain("coresync")
 932         m.domains += cd_por, cd_sync, core_sync
 933
 934         ti_rst = Signal(reset_less=True)
 935         delay = Signal(range(4), reset=3)
 936         with m.If(delay != 0):
 937             m.d.por += delay.eq(delay - 1)
 938         comb += cd_por.clk.eq(ClockSignal())
 939
 940         # power-on reset delay
 941         core_rst = ResetSignal("coresync")
 942         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 943         comb += core_rst.eq(ti_rst)
 944
 945         # busy/halted signals from core
 946         comb += self.busy_o.eq(core.busy_o)
 947         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 948
 949         # temporary hack: says "go" immediately for both address gen and ST
 950         l0 = core.l0
 951         ldst = core.fus.fus['ldst0']
 952         st_go_edge = rising_edge(m, ldst.st.rel_o)
 953         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 954         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 955
 956         return core_rst
 957
 958     def elaborate(self, platform):
 959         m = Module()
 960         # convenience
 961         comb, sync = m.d.comb, m.d.sync
 962         cur_state = self.cur_state
 963         pdecode2 = self.pdecode2
 964         dbg = self.dbg
 965         core = self.core
 966
 967         # set up peripherals and core
 968         core_rst = self.setup_peripherals(m)
 969
 970         # reset current state if core reset requested
 971         with m.If(core_rst):
 972             m.d.sync += self.cur_state.eq(0)
 973
 974         # PC and instruction from I-Memory
 975         comb += self.pc_o.eq(cur_state.pc)
 976         pc_changed = Signal() # note write to PC
 977         sv_changed = Signal() # note write to SVSTATE
 978
 979         # read state either from incoming override or from regfile
 980         # TODO: really should be doing MSR in the same way
 981         pc = state_get(m, core_rst, self.pc_i,
 982                             "pc",                  # read PC
 983                             self.state_r_pc, StateRegs.PC)
 984         svstate = state_get(m, core_rst, self.svstate_i,
 985                             "svstate",   # read SVSTATE
 986                             self.state_r_sv, StateRegs.SVSTATE)
 987
 988         # don't write pc every cycle
 989         comb += self.state_w_pc.wen.eq(0)
 990         comb += self.state_w_pc.data_i.eq(0)
 991
 992         # don't read msr every cycle
 993         comb += self.state_r_msr.ren.eq(0)
 994
 995         # address of the next instruction, in the absence of a branch
 996         # depends on the instruction size
 997         nia = Signal(64)
 998
 999         # connect up debug signals
1000         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1001         comb += dbg.terminate_i.eq(core.core_terminate_o)
1002         comb += dbg.state.pc.eq(pc)
1003         comb += dbg.state.svstate.eq(svstate)
1004         comb += dbg.state.msr.eq(cur_state.msr)
1005
1006         # pass the prefix mode from Fetch to Issue, so the latter can loop
1007         # on VL==0
1008         is_svp64_mode = Signal()
1009
1010         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1011         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1012         # these are the handshake signals between each
1013
1014         # fetch FSM can run as soon as the PC is valid
1015         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1016         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1017
1018         # fetch FSM hands over the instruction to be decoded / issued
1019         fetch_insn_valid_o = Signal()
1020         fetch_insn_ready_i = Signal()
1021
1022         # predicate fetch FSM decodes and fetches the predicate
1023         pred_insn_valid_i = Signal()
1024         pred_insn_ready_o = Signal()
1025
1026         # predicate fetch FSM delivers the masks
1027         pred_mask_valid_o = Signal()
1028         pred_mask_ready_i = Signal()
1029
1030         # issue FSM delivers the instruction to the be executed
1031         exec_insn_valid_i = Signal()
1032         exec_insn_ready_o = Signal()
1033
1034         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1035         exec_pc_valid_o = Signal()
1036         exec_pc_ready_i = Signal()
1037
1038         # the FSMs here are perhaps unusual in that they detect conditions
1039         # then "hold" information, combinatorially, for the core
1040         # (as opposed to using sync - which would be on a clock's delay)
1041         # this includes the actual opcode, valid flags and so on.
1042
1043         # Fetch, then predicate fetch, then Issue, then Execute.
1044         # Issue is where the VL for-loop # lives.  the ready/valid
1045         # signalling is used to communicate between the four.
1046
1047         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1048                        fetch_pc_ready_o, fetch_pc_valid_i,
1049                        fetch_insn_valid_o, fetch_insn_ready_i)
1050
1051         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1052                        dbg, core_rst, is_svp64_mode,
1053                        fetch_pc_ready_o, fetch_pc_valid_i,
1054                        fetch_insn_valid_o, fetch_insn_ready_i,
1055                        pred_insn_valid_i, pred_insn_ready_o,
1056                        pred_mask_valid_o, pred_mask_ready_i,
1057                        exec_insn_valid_i, exec_insn_ready_o,
1058                        exec_pc_valid_o, exec_pc_ready_i)
1059
1060         if self.svp64_en:
1061             self.fetch_predicate_fsm(m,
1062                                      pred_insn_valid_i, pred_insn_ready_o,
1063                                      pred_mask_valid_o, pred_mask_ready_i)
1064
1065         self.execute_fsm(m, core, pc_changed, sv_changed,
1066                          exec_insn_valid_i, exec_insn_ready_o,
1067                          exec_pc_valid_o, exec_pc_ready_i)
1068
1069         # whatever was done above, over-ride it if core reset is held
1070         with m.If(core_rst):
1071             sync += nia.eq(0)
1072
1073         # this bit doesn't have to be in the FSM: connect up to read
1074         # regfiles on demand from DMI
1075         self.do_dmi(m, dbg)
1076
1077         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1078         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1079         self.tb_dec_fsm(m, cur_state.dec)
1080
1081         return m
1082
1083     def do_dmi(self, m, dbg):
1084         """deals with DMI debug requests
1085
1086         currently only provides read requests for the INT regfile, CR and XER
1087         it will later also deal with *writing* to these regfiles.
1088         """
1089         comb = m.d.comb
1090         sync = m.d.sync
1091         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1092         intrf = self.core.regs.rf['int']
1093
1094         with m.If(d_reg.req): # request for regfile access being made
1095             # TODO: error-check this
1096             # XXX should this be combinatorial?  sync better?
1097             if intrf.unary:
1098                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1099             else:
1100                 comb += self.int_r.addr.eq(d_reg.addr)
1101                 comb += self.int_r.ren.eq(1)
1102         d_reg_delay  = Signal()
1103         sync += d_reg_delay.eq(d_reg.req)
1104         with m.If(d_reg_delay):
1105             # data arrives one clock later
1106             comb += d_reg.data.eq(self.int_r.data_o)
1107             comb += d_reg.ack.eq(1)
1108
1109         # sigh same thing for CR debug
1110         with m.If(d_cr.req): # request for regfile access being made
1111             comb += self.cr_r.ren.eq(0b11111111) # enable all
1112         d_cr_delay  = Signal()
1113         sync += d_cr_delay.eq(d_cr.req)
1114         with m.If(d_cr_delay):
1115             # data arrives one clock later
1116             comb += d_cr.data.eq(self.cr_r.data_o)
1117             comb += d_cr.ack.eq(1)
1118
1119         # aaand XER...
1120         with m.If(d_xer.req): # request for regfile access being made
1121             comb += self.xer_r.ren.eq(0b111111) # enable all
1122         d_xer_delay  = Signal()
1123         sync += d_xer_delay.eq(d_xer.req)
1124         with m.If(d_xer_delay):
1125             # data arrives one clock later
1126             comb += d_xer.data.eq(self.xer_r.data_o)
1127             comb += d_xer.ack.eq(1)
1128
1129     def tb_dec_fsm(self, m, spr_dec):
1130         """tb_dec_fsm
1131
1132         this is a FSM for updating either dec or tb.  it runs alternately
1133         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1134         value to DEC, however the regfile has "passthrough" on it so this
1135         *should* be ok.
1136
1137         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1138         """
1139
1140         comb, sync = m.d.comb, m.d.sync
1141         fast_rf = self.core.regs.rf['fast']
1142         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1143         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1144
1145         with m.FSM() as fsm:
1146
1147             # initiates read of current DEC
1148             with m.State("DEC_READ"):
1149                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1150                 comb += fast_r_dectb.ren.eq(1)
1151                 m.next = "DEC_WRITE"
1152
1153             # waits for DEC read to arrive (1 cycle), updates with new value
1154             with m.State("DEC_WRITE"):
1155                 new_dec = Signal(64)
1156                 # TODO: MSR.LPCR 32-bit decrement mode
1157                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1158                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1159                 comb += fast_w_dectb.wen.eq(1)
1160                 comb += fast_w_dectb.data_i.eq(new_dec)
1161                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1162                 m.next = "TB_READ"
1163
1164             # initiates read of current TB
1165             with m.State("TB_READ"):
1166                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1167                 comb += fast_r_dectb.ren.eq(1)
1168                 m.next = "TB_WRITE"
1169
1170             # waits for read TB to arrive, initiates write of current TB
1171             with m.State("TB_WRITE"):
1172                 new_tb = Signal(64)
1173                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1174                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1175                 comb += fast_w_dectb.wen.eq(1)
1176                 comb += fast_w_dectb.data_i.eq(new_tb)
1177                 m.next = "DEC_READ"
1178
1179         return m
1180
1181     def __iter__(self):
1182         yield from self.pc_i.ports()
1183         yield self.pc_o
1184         yield self.memerr_o
1185         yield from self.core.ports()
1186         yield from self.imem.ports()
1187         yield self.core_bigendian_i
1188         yield self.busy_o
1189
1190     def ports(self):
1191         return list(self)
1192
1193     def external_ports(self):
1194         ports = self.pc_i.ports()
1195         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1196                 ]
1197
1198         if self.jtag_en:
1199             ports += list(self.jtag.external_ports())
1200         else:
1201             # don't add DMI if JTAG is enabled
1202             ports += list(self.dbg.dmi.ports())
1203
1204         ports += list(self.imem.ibus.fields.values())
1205         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1206
1207         if self.sram4x4k:
1208             for sram in self.sram4k:
1209                 ports += list(sram.bus.fields.values())
1210
1211         if self.xics:
1212             ports += list(self.xics_icp.bus.fields.values())
1213             ports += list(self.xics_ics.bus.fields.values())
1214             ports.append(self.int_level_i)
1215
1216         if self.gpio:
1217             ports += list(self.simple_gpio.bus.fields.values())
1218             ports.append(self.gpio_o)
1219
1220         return ports
1221
1222     def ports(self):
1223         return list(self)
1224
1225
1226 class TestIssuer(Elaboratable):
1227     def __init__(self, pspec):
1228         self.ti = TestIssuerInternal(pspec)
1229
1230         self.pll = DummyPLL(instance=True)
1231
1232         # PLL direct clock or not
1233         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1234         if self.pll_en:
1235             self.pll_test_o = Signal(reset_less=True)
1236             self.pll_vco_o = Signal(reset_less=True)
1237             self.clk_sel_i = Signal(reset_less=True)
1238
1239     def elaborate(self, platform):
1240         m = Module()
1241         comb = m.d.comb
1242
1243         # TestIssuer runs at direct clock
1244         m.submodules.ti = ti = self.ti
1245         cd_int = ClockDomain("coresync")
1246
1247         if self.pll_en:
1248             # ClockSelect runs at PLL output internal clock rate
1249             m.submodules.wrappll = pll = self.pll
1250
1251             # add clock domains from PLL
1252             cd_pll = ClockDomain("pllclk")
1253             m.domains += cd_pll
1254
1255             # PLL clock established.  has the side-effect of running clklsel
1256             # at the PLL's speed (see DomainRenamer("pllclk") above)
1257             pllclk = ClockSignal("pllclk")
1258             comb += pllclk.eq(pll.clk_pll_o)
1259
1260             # wire up external 24mhz to PLL
1261             comb += pll.clk_24_i.eq(ClockSignal())
1262
1263             # output 18 mhz PLL test signal, and analog oscillator out
1264             comb += self.pll_test_o.eq(pll.pll_test_o)
1265             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1266
1267             # input to pll clock selection
1268             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1269
1270             # now wire up ResetSignals.  don't mind them being in this domain
1271             pll_rst = ResetSignal("pllclk")
1272             comb += pll_rst.eq(ResetSignal())
1273
1274         # internal clock is set to selector clock-out.  has the side-effect of
1275         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1276         intclk = ClockSignal("coresync")
1277         if self.pll_en:
1278             comb += intclk.eq(pll.clk_pll_o)
1279         else:
1280             comb += intclk.eq(ClockSignal())
1281
1282         return m
1283
1284     def ports(self):
1285         return list(self.ti.ports()) + list(self.pll.ports()) + \
1286                [ClockSignal(), ResetSignal()]
1287
1288     def external_ports(self):
1289         ports = self.ti.external_ports()
1290         ports.append(ClockSignal())
1291         ports.append(ResetSignal())
1292         if self.pll_en:
1293             ports.append(self.clk_sel_i)
1294             ports.append(self.pll_test_o)
1295             ports.append(self.pll_vco_o)
1296         return ports
1297
1298
1299 if __name__ == '__main__':
1300     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1301              'spr': 1,
1302              'div': 1,
1303              'mul': 1,
1304              'shiftrot': 1
1305             }
1306     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1307                          imem_ifacetype='bare_wb',
1308                          addr_wid=48,
1309                          mask_wid=8,
1310                          reg_wid=64,
1311                          units=units)
1312     dut = TestIssuer(pspec)
1313     vl = main(dut, ports=dut.ports(), name="test_issuer")
1314
1315     if len(sys.argv) == 1:
1316         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1317         with open("test_issuer.il", "w") as f:
1318             f.write(vl)