src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.o_data)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         #pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         pdecode = self.pdecode2.dec
 232
 233         if self.svp64_en:
 234             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 235
 236         # Test Instruction memory
 237         self.imem = ConfigFetchUnit(pspec).fu
 238
 239         # DMI interface
 240         self.dbg = CoreDebug()
 241
 242         # instruction go/monitor
 243         self.pc_o = Signal(64, reset_less=True)
 244         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 245         self.svstate_i = Data(64, "svstate_i") # ditto
 246         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 247         self.busy_o = Signal(reset_less=True)
 248         self.memerr_o = Signal(reset_less=True)
 249
 250         # STATE regfile read /write ports for PC, MSR, SVSTATE
 251         staterf = self.core.regs.rf['state']
 252         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 253         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 254         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 255         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 256         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 257
 258         # DMI interface access
 259         intrf = self.core.regs.rf['int']
 260         crrf = self.core.regs.rf['cr']
 261         xerrf = self.core.regs.rf['xer']
 262         self.int_r = intrf.r_ports['dmi'] # INT read
 263         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 264         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 265
 266         if self.svp64_en:
 267             # for predication
 268             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 269             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 270
 271         # hack method of keeping an eye on whether branch/trap set the PC
 272         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 273         self.state_nia.wen.name = 'state_nia_wen'
 274
 275         # pulse to synchronize the simulator at instruction end
 276         self.insn_done = Signal()
 277
 278         if self.svp64_en:
 279             # store copies of predicate masks
 280             self.srcmask = Signal(64)
 281             self.dstmask = Signal(64)
 282
 283     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 284                         fetch_pc_o_ready, fetch_pc_i_valid,
 285                         fetch_insn_o_valid, fetch_insn_i_ready):
 286         """fetch FSM
 287
 288         this FSM performs fetch of raw instruction data, partial-decodes
 289         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 290         read a 2nd 32-bit quantity if that occurs.
 291         """
 292         comb = m.d.comb
 293         sync = m.d.sync
 294         pdecode2 = self.pdecode2
 295         cur_state = self.cur_state
 296         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 297
 298         msr_read = Signal(reset=1)
 299
 300         with m.FSM(name='fetch_fsm'):
 301
 302             # waiting (zzz)
 303             with m.State("IDLE"):
 304                 comb += fetch_pc_o_ready.eq(1)
 305                 with m.If(fetch_pc_i_valid):
 306                     # instruction allowed to go: start by reading the PC
 307                     # capture the PC and also drop it into Insn Memory
 308                     # we have joined a pair of combinatorial memory
 309                     # lookups together.  this is Generally Bad.
 310                     comb += self.imem.a_pc_i.eq(pc)
 311                     comb += self.imem.a_i_valid.eq(1)
 312                     comb += self.imem.f_i_valid.eq(1)
 313                     sync += cur_state.pc.eq(pc)
 314                     sync += cur_state.svstate.eq(svstate) # and svstate
 315
 316                     # initiate read of MSR. arrives one clock later
 317                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 318                     sync += msr_read.eq(0)
 319
 320                     m.next = "INSN_READ"  # move to "wait for bus" phase
 321
 322             # dummy pause to find out why simulation is not keeping up
 323             with m.State("INSN_READ"):
 324                 # one cycle later, msr/sv read arrives.  valid only once.
 325                 with m.If(~msr_read):
 326                     sync += msr_read.eq(1) # yeah don't read it again
 327                     sync += cur_state.msr.eq(self.state_r_msr.o_data)
 328                 with m.If(self.imem.f_busy_o): # zzz...
 329                     # busy: stay in wait-read
 330                     comb += self.imem.a_i_valid.eq(1)
 331                     comb += self.imem.f_i_valid.eq(1)
 332                 with m.Else():
 333                     # not busy: instruction fetched
 334                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 335                     if self.svp64_en:
 336                         svp64 = self.svp64
 337                         # decode the SVP64 prefix, if any
 338                         comb += svp64.raw_opcode_in.eq(insn)
 339                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 340                         # pass the decoded prefix (if any) to PowerDecoder2
 341                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 342                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 343                         # remember whether this is a prefixed instruction, so
 344                         # the FSM can readily loop when VL==0
 345                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 346                         # calculate the address of the following instruction
 347                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 348                         sync += nia.eq(cur_state.pc + insn_size)
 349                         with m.If(~svp64.is_svp64_mode):
 350                             # with no prefix, store the instruction
 351                             # and hand it directly to the next FSM
 352                             sync += dec_opcode_i.eq(insn)
 353                             m.next = "INSN_READY"
 354                         with m.Else():
 355                             # fetch the rest of the instruction from memory
 356                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 357                             comb += self.imem.a_i_valid.eq(1)
 358                             comb += self.imem.f_i_valid.eq(1)
 359                             m.next = "INSN_READ2"
 360                     else:
 361                         # not SVP64 - 32-bit only
 362                         sync += nia.eq(cur_state.pc + 4)
 363                         sync += dec_opcode_i.eq(insn)
 364                         m.next = "INSN_READY"
 365
 366             with m.State("INSN_READ2"):
 367                 with m.If(self.imem.f_busy_o):  # zzz...
 368                     # busy: stay in wait-read
 369                     comb += self.imem.a_i_valid.eq(1)
 370                     comb += self.imem.f_i_valid.eq(1)
 371                 with m.Else():
 372                     # not busy: instruction fetched
 373                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 374                     sync += dec_opcode_i.eq(insn)
 375                     m.next = "INSN_READY"
 376                     # TODO: probably can start looking at pdecode2.rm_dec
 377                     # here or maybe even in INSN_READ state, if svp64_mode
 378                     # detected, in order to trigger - and wait for - the
 379                     # predicate reading.
 380                     if self.svp64_en:
 381                         pmode = pdecode2.rm_dec.predmode
 382                     """
 383                     if pmode != SVP64PredMode.ALWAYS.value:
 384                         fire predicate loading FSM and wait before
 385                         moving to INSN_READY
 386                     else:
 387                         sync += self.srcmask.eq(-1) # set to all 1s
 388                         sync += self.dstmask.eq(-1) # set to all 1s
 389                         m.next = "INSN_READY"
 390                     """
 391
 392             with m.State("INSN_READY"):
 393                 # hand over the instruction, to be decoded
 394                 comb += fetch_insn_o_valid.eq(1)
 395                 with m.If(fetch_insn_i_ready):
 396                     m.next = "IDLE"
 397
 398     def fetch_predicate_fsm(self, m,
 399                             pred_insn_i_valid, pred_insn_o_ready,
 400                             pred_mask_o_valid, pred_mask_i_ready):
 401         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 402            src/dest predicate masks
 403
 404         https://bugs.libre-soc.org/show_bug.cgi?id=617
 405         the predicates can be read here, by using IntRegs r_ports['pred']
 406         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 407         be done through multiple reads, extracting one relevant at a time.
 408         later, a faster way would be to use the 32-bit-wide CR port but
 409         this is more complex decoding, here.  equivalent code used in
 410         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 411
 412         note: this ENTIRE FSM is not to be called when svp64 is disabled
 413         """
 414         comb = m.d.comb
 415         sync = m.d.sync
 416         pdecode2 = self.pdecode2
 417         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 418         predmode = rm_dec.predmode
 419         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 420         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 421         # get src/dst step, so we can skip already used mask bits
 422         cur_state = self.cur_state
 423         srcstep = cur_state.svstate.srcstep
 424         dststep = cur_state.svstate.dststep
 425         cur_vl = cur_state.svstate.vl
 426
 427         # decode predicates
 428         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 429         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 430         sidx, scrinvert = get_predcr(m, srcpred, 's')
 431         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 432
 433         # store fetched masks, for either intpred or crpred
 434         # when src/dst step is not zero, the skipped mask bits need to be
 435         # shifted-out, before actually storing them in src/dest mask
 436         new_srcmask = Signal(64, reset_less=True)
 437         new_dstmask = Signal(64, reset_less=True)
 438
 439         with m.FSM(name="fetch_predicate"):
 440
 441             with m.State("FETCH_PRED_IDLE"):
 442                 comb += pred_insn_o_ready.eq(1)
 443                 with m.If(pred_insn_i_valid):
 444                     with m.If(predmode == SVP64PredMode.INT):
 445                         # skip fetching destination mask register, when zero
 446                         with m.If(dall1s):
 447                             sync += new_dstmask.eq(-1)
 448                             # directly go to fetch source mask register
 449                             # guaranteed not to be zero (otherwise predmode
 450                             # would be SVP64PredMode.ALWAYS, not INT)
 451                             comb += int_pred.addr.eq(sregread)
 452                             comb += int_pred.ren.eq(1)
 453                             m.next = "INT_SRC_READ"
 454                         # fetch destination predicate register
 455                         with m.Else():
 456                             comb += int_pred.addr.eq(dregread)
 457                             comb += int_pred.ren.eq(1)
 458                             m.next = "INT_DST_READ"
 459                     with m.Elif(predmode == SVP64PredMode.CR):
 460                         # go fetch masks from the CR register file
 461                         sync += new_srcmask.eq(0)
 462                         sync += new_dstmask.eq(0)
 463                         m.next = "CR_READ"
 464                     with m.Else():
 465                         sync += self.srcmask.eq(-1)
 466                         sync += self.dstmask.eq(-1)
 467                         m.next = "FETCH_PRED_DONE"
 468
 469             with m.State("INT_DST_READ"):
 470                 # store destination mask
 471                 inv = Repl(dinvert, 64)
 472                 with m.If(dunary):
 473                     # set selected mask bit for 1<<r3 mode
 474                     dst_shift = Signal(range(64))
 475                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 476                     sync += new_dstmask.eq(1 << dst_shift)
 477                 with m.Else():
 478                     # invert mask if requested
 479                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 480                 # skip fetching source mask register, when zero
 481                 with m.If(sall1s):
 482                     sync += new_srcmask.eq(-1)
 483                     m.next = "FETCH_PRED_SHIFT_MASK"
 484                 # fetch source predicate register
 485                 with m.Else():
 486                     comb += int_pred.addr.eq(sregread)
 487                     comb += int_pred.ren.eq(1)
 488                     m.next = "INT_SRC_READ"
 489
 490             with m.State("INT_SRC_READ"):
 491                 # store source mask
 492                 inv = Repl(sinvert, 64)
 493                 with m.If(sunary):
 494                     # set selected mask bit for 1<<r3 mode
 495                     src_shift = Signal(range(64))
 496                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 497                     sync += new_srcmask.eq(1 << src_shift)
 498                 with m.Else():
 499                     # invert mask if requested
 500                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 501                 m.next = "FETCH_PRED_SHIFT_MASK"
 502
 503             # fetch masks from the CR register file
 504             # implements the following loop:
 505             # idx, inv = get_predcr(mask)
 506             # mask = 0
 507             # for cr_idx in range(vl):
 508             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 509             #     if cr[idx] ^ inv:
 510             #         mask |= 1 << cr_idx
 511             # return mask
 512             with m.State("CR_READ"):
 513                 # CR index to be read, which will be ready by the next cycle
 514                 cr_idx = Signal.like(cur_vl, reset_less=True)
 515                 # submit the read operation to the regfile
 516                 with m.If(cr_idx != cur_vl):
 517                     # the CR read port is unary ...
 518                     # ren = 1 << cr_idx
 519                     # ... in MSB0 convention ...
 520                     # ren = 1 << (7 - cr_idx)
 521                     # ... and with an offset:
 522                     # ren = 1 << (7 - off - cr_idx)
 523                     idx = SVP64CROffs.CRPred + cr_idx
 524                     comb += cr_pred.ren.eq(1 << (7 - idx))
 525                     # signal data valid in the next cycle
 526                     cr_read = Signal(reset_less=True)
 527                     sync += cr_read.eq(1)
 528                     # load the next index
 529                     sync += cr_idx.eq(cr_idx + 1)
 530                 with m.Else():
 531                     # exit on loop end
 532                     sync += cr_read.eq(0)
 533                     sync += cr_idx.eq(0)
 534                     m.next = "FETCH_PRED_SHIFT_MASK"
 535                 with m.If(cr_read):
 536                     # compensate for the one cycle delay on the regfile
 537                     cur_cr_idx = Signal.like(cur_vl)
 538                     comb += cur_cr_idx.eq(cr_idx - 1)
 539                     # read the CR field, select the appropriate bit
 540                     cr_field = Signal(4)
 541                     scr_bit = Signal()
 542                     dcr_bit = Signal()
 543                     comb += cr_field.eq(cr_pred.o_data)
 544                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 545                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 546                     # set the corresponding mask bit
 547                     bit_to_set = Signal.like(self.srcmask)
 548                     comb += bit_to_set.eq(1 << cur_cr_idx)
 549                     with m.If(scr_bit):
 550                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 551                     with m.If(dcr_bit):
 552                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 553
 554             with m.State("FETCH_PRED_SHIFT_MASK"):
 555                 # shift-out skipped mask bits
 556                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 557                 sync += self.dstmask.eq(new_dstmask >> dststep)
 558                 m.next = "FETCH_PRED_DONE"
 559
 560             with m.State("FETCH_PRED_DONE"):
 561                 comb += pred_mask_o_valid.eq(1)
 562                 with m.If(pred_mask_i_ready):
 563                     m.next = "FETCH_PRED_IDLE"
 564
 565     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 566                   dbg, core_rst, is_svp64_mode,
 567                   fetch_pc_o_ready, fetch_pc_i_valid,
 568                   fetch_insn_o_valid, fetch_insn_i_ready,
 569                   pred_insn_i_valid, pred_insn_o_ready,
 570                   pred_mask_o_valid, pred_mask_i_ready,
 571                   exec_insn_i_valid, exec_insn_o_ready,
 572                   exec_pc_o_valid, exec_pc_i_ready):
 573         """issue FSM
 574
 575         decode / issue FSM.  this interacts with the "fetch" FSM
 576         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 577         (outgoing). also interacts with the "execute" FSM
 578         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 579         (incoming).
 580         SVP64 RM prefixes have already been set up by the
 581         "fetch" phase, so execute is fairly straightforward.
 582         """
 583
 584         comb = m.d.comb
 585         sync = m.d.sync
 586         pdecode2 = self.pdecode2
 587         cur_state = self.cur_state
 588
 589         # temporaries
 590         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 591
 592         # for updating svstate (things like srcstep etc.)
 593         update_svstate = Signal() # set this (below) if updating
 594         new_svstate = SVSTATERec("new_svstate")
 595         comb += new_svstate.eq(cur_state.svstate)
 596
 597         # precalculate srcstep+1 and dststep+1
 598         cur_srcstep = cur_state.svstate.srcstep
 599         cur_dststep = cur_state.svstate.dststep
 600         next_srcstep = Signal.like(cur_srcstep)
 601         next_dststep = Signal.like(cur_dststep)
 602         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 603         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 604
 605         # note if an exception happened.  in a pipelined or OoO design
 606         # this needs to be accompanied by "shadowing" (or stalling)
 607         el = []
 608         for exc in core.fus.excs.values():
 609             el.append(exc.happened)
 610         exc_happened = Signal()
 611         if len(el) > 0: # at least one exception
 612             comb += exc_happened.eq(Cat(*el).bool())
 613
 614         with m.FSM(name="issue_fsm"):
 615
 616             # sync with the "fetch" phase which is reading the instruction
 617             # at this point, there is no instruction running, that
 618             # could inadvertently update the PC.
 619             with m.State("ISSUE_START"):
 620                 # wait on "core stop" release, before next fetch
 621                 # need to do this here, in case we are in a VL==0 loop
 622                 with m.If(~dbg.core_stop_o & ~core_rst):
 623                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 624                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 625                         m.next = "INSN_WAIT"
 626                 with m.Else():
 627                     # tell core it's stopped, and acknowledge debug handshake
 628                     comb += dbg.core_stopped_i.eq(1)
 629                     # while stopped, allow updating the PC and SVSTATE
 630                     with m.If(self.pc_i.ok):
 631                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 632                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 633                         sync += pc_changed.eq(1)
 634                     with m.If(self.svstate_i.ok):
 635                         comb += new_svstate.eq(self.svstate_i.data)
 636                         comb += update_svstate.eq(1)
 637                         sync += sv_changed.eq(1)
 638
 639             # wait for an instruction to arrive from Fetch
 640             with m.State("INSN_WAIT"):
 641                 comb += fetch_insn_i_ready.eq(1)
 642                 with m.If(fetch_insn_o_valid):
 643                     # loop into ISSUE_START if it's a SVP64 instruction
 644                     # and VL == 0.  this because VL==0 is a for-loop
 645                     # from 0 to 0 i.e. always, always a NOP.
 646                     cur_vl = cur_state.svstate.vl
 647                     with m.If(is_svp64_mode & (cur_vl == 0)):
 648                         # update the PC before fetching the next instruction
 649                         # since we are in a VL==0 loop, no instruction was
 650                         # executed that we could be overwriting
 651                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 652                         comb += self.state_w_pc.i_data.eq(nia)
 653                         comb += self.insn_done.eq(1)
 654                         m.next = "ISSUE_START"
 655                     with m.Else():
 656                         if self.svp64_en:
 657                             m.next = "PRED_START"  # start fetching predicate
 658                         else:
 659                             m.next = "DECODE_SV"  # skip predication
 660
 661             with m.State("PRED_START"):
 662                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 663                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 664                     m.next = "MASK_WAIT"
 665
 666             with m.State("MASK_WAIT"):
 667                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 668                 with m.If(pred_mask_o_valid): # predication masks are ready
 669                     m.next = "PRED_SKIP"
 670
 671             # skip zeros in predicate
 672             with m.State("PRED_SKIP"):
 673                 with m.If(~is_svp64_mode):
 674                     m.next = "DECODE_SV"  # nothing to do
 675                 with m.Else():
 676                     if self.svp64_en:
 677                         pred_src_zero = pdecode2.rm_dec.pred_sz
 678                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 679
 680                         # new srcstep, after skipping zeros
 681                         skip_srcstep = Signal.like(cur_srcstep)
 682                         # value to be added to the current srcstep
 683                         src_delta = Signal.like(cur_srcstep)
 684                         # add leading zeros to srcstep, if not in zero mode
 685                         with m.If(~pred_src_zero):
 686                             # priority encoder (count leading zeros)
 687                             # append guard bit, in case the mask is all zeros
 688                             pri_enc_src = PriorityEncoder(65)
 689                             m.submodules.pri_enc_src = pri_enc_src
 690                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 691                                                          Const(1, 1)))
 692                             comb += src_delta.eq(pri_enc_src.o)
 693                         # apply delta to srcstep
 694                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 695                         # shift-out all leading zeros from the mask
 696                         # plus the leading "one" bit
 697                         # TODO count leading zeros and shift-out the zero
 698                         #      bits, in the same step, in hardware
 699                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 700
 701                         # same as above, but for dststep
 702                         skip_dststep = Signal.like(cur_dststep)
 703                         dst_delta = Signal.like(cur_dststep)
 704                         with m.If(~pred_dst_zero):
 705                             pri_enc_dst = PriorityEncoder(65)
 706                             m.submodules.pri_enc_dst = pri_enc_dst
 707                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 708                                                          Const(1, 1)))
 709                             comb += dst_delta.eq(pri_enc_dst.o)
 710                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 711                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 712
 713                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 714                         with m.If((skip_srcstep >= cur_vl) |
 715                                   (skip_dststep >= cur_vl)):
 716                             # end of VL loop. Update PC and reset src/dst step
 717                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 718                             comb += self.state_w_pc.i_data.eq(nia)
 719                             comb += new_svstate.srcstep.eq(0)
 720                             comb += new_svstate.dststep.eq(0)
 721                             comb += update_svstate.eq(1)
 722                             # synchronize with the simulator
 723                             comb += self.insn_done.eq(1)
 724                             # go back to Issue
 725                             m.next = "ISSUE_START"
 726                         with m.Else():
 727                             # update new src/dst step
 728                             comb += new_svstate.srcstep.eq(skip_srcstep)
 729                             comb += new_svstate.dststep.eq(skip_dststep)
 730                             comb += update_svstate.eq(1)
 731                             # proceed to Decode
 732                             m.next = "DECODE_SV"
 733
 734                         # pass predicate mask bits through to satellite decoders
 735                         # TODO: for SIMD this will be *multiple* bits
 736                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 737                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 738
 739             # after src/dst step have been updated, we are ready
 740             # to decode the instruction
 741             with m.State("DECODE_SV"):
 742                 # decode the instruction
 743                 sync += core.e.eq(pdecode2.e)
 744                 sync += core.state.eq(cur_state)
 745                 sync += core.raw_insn_i.eq(dec_opcode_i)
 746                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 747                 if self.svp64_en:
 748                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 749                     # set RA_OR_ZERO detection in satellite decoders
 750                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 751                     # and svp64 detection
 752                     sync += core.is_svp64_mode.eq(is_svp64_mode)
 753                     # and svp64 bit-rev'd ldst mode
 754                     ldst_dec = pdecode2.use_svp64_ldst_dec
 755                     sync += core.use_svp64_ldst_dec.eq(ldst_dec)
 756                 # after decoding, reset any previous exception condition,
 757                 # allowing it to be set again during the next execution
 758                 sync += pdecode2.ldst_exc.eq(0)
 759
 760                 m.next = "INSN_EXECUTE"  # move to "execute"
 761
 762             # handshake with execution FSM, move to "wait" once acknowledged
 763             with m.State("INSN_EXECUTE"):
 764                 comb += exec_insn_i_valid.eq(1) # trigger execute
 765                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 766                     m.next = "EXECUTE_WAIT"
 767
 768             with m.State("EXECUTE_WAIT"):
 769                 # wait on "core stop" release, at instruction end
 770                 # need to do this here, in case we are in a VL>1 loop
 771                 with m.If(~dbg.core_stop_o & ~core_rst):
 772                     comb += exec_pc_i_ready.eq(1)
 773                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 774                     # the exception info needs to be blatted into
 775                     # pdecode.ldst_exc, and the instruction "re-run".
 776                     # when ldst_exc.happened is set, the PowerDecoder2
 777                     # reacts very differently: it re-writes the instruction
 778                     # with a "trap" (calls PowerDecoder2.trap()) which
 779                     # will *overwrite* whatever was requested and jump the
 780                     # PC to the exception address, as well as alter MSR.
 781                     # nothing else needs to be done other than to note
 782                     # the change of PC and MSR (and, later, SVSTATE)
 783                     with m.If(exc_happened):
 784                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 785
 786                     with m.If(exec_pc_o_valid):
 787
 788                         # was this the last loop iteration?
 789                         is_last = Signal()
 790                         cur_vl = cur_state.svstate.vl
 791                         comb += is_last.eq(next_srcstep == cur_vl)
 792
 793                         # return directly to Decode if Execute generated an
 794                         # exception.
 795                         with m.If(pdecode2.ldst_exc.happened):
 796                             m.next = "DECODE_SV"
 797
 798                         # if either PC or SVSTATE were changed by the previous
 799                         # instruction, go directly back to Fetch, without
 800                         # updating either PC or SVSTATE
 801                         with m.Elif(pc_changed | sv_changed):
 802                             m.next = "ISSUE_START"
 803
 804                         # also return to Fetch, when no output was a vector
 805                         # (regardless of SRCSTEP and VL), or when the last
 806                         # instruction was really the last one of the VL loop
 807                         with m.Elif((~pdecode2.loop_continue) | is_last):
 808                             # before going back to fetch, update the PC state
 809                             # register with the NIA.
 810                             # ok here we are not reading the branch unit.
 811                             # TODO: this just blithely overwrites whatever
 812                             #       pipeline updated the PC
 813                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 814                             comb += self.state_w_pc.i_data.eq(nia)
 815                             # reset SRCSTEP before returning to Fetch
 816                             if self.svp64_en:
 817                                 with m.If(pdecode2.loop_continue):
 818                                     comb += new_svstate.srcstep.eq(0)
 819                                     comb += new_svstate.dststep.eq(0)
 820                                     comb += update_svstate.eq(1)
 821                             else:
 822                                 comb += new_svstate.srcstep.eq(0)
 823                                 comb += new_svstate.dststep.eq(0)
 824                                 comb += update_svstate.eq(1)
 825                             m.next = "ISSUE_START"
 826
 827                         # returning to Execute? then, first update SRCSTEP
 828                         with m.Else():
 829                             comb += new_svstate.srcstep.eq(next_srcstep)
 830                             comb += new_svstate.dststep.eq(next_dststep)
 831                             comb += update_svstate.eq(1)
 832                             # return to mask skip loop
 833                             m.next = "PRED_SKIP"
 834
 835                 with m.Else():
 836                     comb += dbg.core_stopped_i.eq(1)
 837                     # while stopped, allow updating the PC and SVSTATE
 838                     with m.If(self.pc_i.ok):
 839                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 840                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 841                         sync += pc_changed.eq(1)
 842                     with m.If(self.svstate_i.ok):
 843                         comb += new_svstate.eq(self.svstate_i.data)
 844                         comb += update_svstate.eq(1)
 845                         sync += sv_changed.eq(1)
 846
 847         # check if svstate needs updating: if so, write it to State Regfile
 848         with m.If(update_svstate):
 849             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 850             comb += self.state_w_sv.i_data.eq(new_svstate)
 851             sync += cur_state.svstate.eq(new_svstate) # for next clock
 852
 853     def execute_fsm(self, m, core, pc_changed, sv_changed,
 854                     exec_insn_i_valid, exec_insn_o_ready,
 855                     exec_pc_o_valid, exec_pc_i_ready):
 856         """execute FSM
 857
 858         execute FSM. this interacts with the "issue" FSM
 859         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 860         (outgoing). SVP64 RM prefixes have already been set up by the
 861         "issue" phase, so execute is fairly straightforward.
 862         """
 863
 864         comb = m.d.comb
 865         sync = m.d.sync
 866         pdecode2 = self.pdecode2
 867
 868         # temporaries
 869         core_busy_o = core.busy_o                 # core is busy
 870         core_ivalid_i = core.ivalid_i             # instruction is valid
 871         core_issue_i = core.issue_i               # instruction is issued
 872         insn_type = core.e.do.insn_type           # instruction MicroOp type
 873
 874         with m.FSM(name="exec_fsm"):
 875
 876             # waiting for instruction bus (stays there until not busy)
 877             with m.State("INSN_START"):
 878                 comb += exec_insn_o_ready.eq(1)
 879                 with m.If(exec_insn_i_valid):
 880                     comb += core_ivalid_i.eq(1)  # instruction is valid
 881                     comb += core_issue_i.eq(1)  # and issued
 882                     sync += sv_changed.eq(0)
 883                     sync += pc_changed.eq(0)
 884                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 885
 886             # instruction started: must wait till it finishes
 887             with m.State("INSN_ACTIVE"):
 888                 with m.If(insn_type != MicrOp.OP_NOP):
 889                     comb += core_ivalid_i.eq(1) # instruction is valid
 890                 # note changes to PC and SVSTATE
 891                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 892                     sync += sv_changed.eq(1)
 893                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 894                     sync += pc_changed.eq(1)
 895                 with m.If(~core_busy_o): # instruction done!
 896                     comb += exec_pc_o_valid.eq(1)
 897                     with m.If(exec_pc_i_ready):
 898                         # when finished, indicate "done".
 899                         # however, if there was an exception, the instruction
 900                         # is *not* yet done.  this is an implementation
 901                         # detail: we choose to implement exceptions by
 902                         # taking the exception information from the LDST
 903                         # unit, putting that *back* into the PowerDecoder2,
 904                         # and *re-running the entire instruction*.
 905                         # if we erroneously indicate "done" here, it is as if
 906                         # there were *TWO* instructions:
 907                         # 1) the failed LDST 2) a TRAP.
 908                         with m.If(~pdecode2.ldst_exc.happened):
 909                             comb += self.insn_done.eq(1)
 910                         m.next = "INSN_START"  # back to fetch
 911
 912     def setup_peripherals(self, m):
 913         comb, sync = m.d.comb, m.d.sync
 914
 915         # okaaaay so the debug module must be in coresync clock domain
 916         # but NOT its reset signal. to cope with this, set every single
 917         # submodule explicitly in coresync domain, debug and JTAG
 918         # in their own one but using *external* reset.
 919         csd = DomainRenamer("coresync")
 920         dbd = DomainRenamer(self.dbg_domain)
 921
 922         m.submodules.core = core = csd(self.core)
 923         m.submodules.imem = imem = csd(self.imem)
 924         m.submodules.dbg = dbg = dbd(self.dbg)
 925         if self.jtag_en:
 926             m.submodules.jtag = jtag = dbd(self.jtag)
 927             # TODO: UART2GDB mux, here, from external pin
 928             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 929             sync += dbg.dmi.connect_to(jtag.dmi)
 930
 931         cur_state = self.cur_state
 932
 933         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 934         if self.sram4x4k:
 935             for i, sram in enumerate(self.sram4k):
 936                 m.submodules["sram4k_%d" % i] = csd(sram)
 937                 comb += sram.enable.eq(self.wb_sram_en)
 938
 939         # XICS interrupt handler
 940         if self.xics:
 941             m.submodules.xics_icp = icp = csd(self.xics_icp)
 942             m.submodules.xics_ics = ics = csd(self.xics_ics)
 943             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 944             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 945
 946         # GPIO test peripheral
 947         if self.gpio:
 948             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 949
 950         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 951         # XXX causes litex ECP5 test to get wrong idea about input and output
 952         # (but works with verilator sim *sigh*)
 953         #if self.gpio and self.xics:
 954         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 955
 956         # instruction decoder
 957         pdecode = create_pdecode()
 958         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 959         if self.svp64_en:
 960             m.submodules.svp64 = svp64 = csd(self.svp64)
 961
 962         # convenience
 963         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 964         intrf = self.core.regs.rf['int']
 965
 966         # clock delay power-on reset
 967         cd_por  = ClockDomain(reset_less=True)
 968         cd_sync = ClockDomain()
 969         core_sync = ClockDomain("coresync")
 970         m.domains += cd_por, cd_sync, core_sync
 971         if self.dbg_domain != "sync":
 972             dbg_sync = ClockDomain(self.dbg_domain)
 973             m.domains += dbg_sync
 974
 975         ti_rst = Signal(reset_less=True)
 976         delay = Signal(range(4), reset=3)
 977         with m.If(delay != 0):
 978             m.d.por += delay.eq(delay - 1)
 979         comb += cd_por.clk.eq(ClockSignal())
 980
 981         # power-on reset delay
 982         core_rst = ResetSignal("coresync")
 983         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 984         comb += core_rst.eq(ti_rst)
 985
 986         # debug clock is same as coresync, but reset is *main external*
 987         if self.dbg_domain != "sync":
 988             dbg_rst = ResetSignal(self.dbg_domain)
 989             comb += dbg_rst.eq(ResetSignal())
 990
 991         # busy/halted signals from core
 992         comb += self.busy_o.eq(core.busy_o)
 993         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 994
 995         # temporary hack: says "go" immediately for both address gen and ST
 996         l0 = core.l0
 997         ldst = core.fus.fus['ldst0']
 998         st_go_edge = rising_edge(m, ldst.st.rel_o)
 999         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1000         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1001
1002     def elaborate(self, platform):
1003         m = Module()
1004         # convenience
1005         comb, sync = m.d.comb, m.d.sync
1006         cur_state = self.cur_state
1007         pdecode2 = self.pdecode2
1008         dbg = self.dbg
1009         core = self.core
1010
1011         # set up peripherals and core
1012         core_rst = self.core_rst
1013         self.setup_peripherals(m)
1014
1015         # reset current state if core reset requested
1016         with m.If(core_rst):
1017             m.d.sync += self.cur_state.eq(0)
1018
1019         # PC and instruction from I-Memory
1020         comb += self.pc_o.eq(cur_state.pc)
1021         pc_changed = Signal() # note write to PC
1022         sv_changed = Signal() # note write to SVSTATE
1023
1024         # read state either from incoming override or from regfile
1025         # TODO: really should be doing MSR in the same way
1026         pc = state_get(m, core_rst, self.pc_i,
1027                             "pc",                  # read PC
1028                             self.state_r_pc, StateRegs.PC)
1029         svstate = state_get(m, core_rst, self.svstate_i,
1030                             "svstate",   # read SVSTATE
1031                             self.state_r_sv, StateRegs.SVSTATE)
1032
1033         # don't write pc every cycle
1034         comb += self.state_w_pc.wen.eq(0)
1035         comb += self.state_w_pc.i_data.eq(0)
1036
1037         # don't read msr every cycle
1038         comb += self.state_r_msr.ren.eq(0)
1039
1040         # address of the next instruction, in the absence of a branch
1041         # depends on the instruction size
1042         nia = Signal(64)
1043
1044         # connect up debug signals
1045         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1046         comb += dbg.terminate_i.eq(core.core_terminate_o)
1047         comb += dbg.state.pc.eq(pc)
1048         comb += dbg.state.svstate.eq(svstate)
1049         comb += dbg.state.msr.eq(cur_state.msr)
1050
1051         # pass the prefix mode from Fetch to Issue, so the latter can loop
1052         # on VL==0
1053         is_svp64_mode = Signal()
1054
1055         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1056         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1057         # these are the handshake signals between each
1058
1059         # fetch FSM can run as soon as the PC is valid
1060         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1061         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1062
1063         # fetch FSM hands over the instruction to be decoded / issued
1064         fetch_insn_o_valid = Signal()
1065         fetch_insn_i_ready = Signal()
1066
1067         # predicate fetch FSM decodes and fetches the predicate
1068         pred_insn_i_valid = Signal()
1069         pred_insn_o_ready = Signal()
1070
1071         # predicate fetch FSM delivers the masks
1072         pred_mask_o_valid = Signal()
1073         pred_mask_i_ready = Signal()
1074
1075         # issue FSM delivers the instruction to the be executed
1076         exec_insn_i_valid = Signal()
1077         exec_insn_o_ready = Signal()
1078
1079         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1080         exec_pc_o_valid = Signal()
1081         exec_pc_i_ready = Signal()
1082
1083         # the FSMs here are perhaps unusual in that they detect conditions
1084         # then "hold" information, combinatorially, for the core
1085         # (as opposed to using sync - which would be on a clock's delay)
1086         # this includes the actual opcode, valid flags and so on.
1087
1088         # Fetch, then predicate fetch, then Issue, then Execute.
1089         # Issue is where the VL for-loop # lives.  the ready/valid
1090         # signalling is used to communicate between the four.
1091
1092         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1093                        fetch_pc_o_ready, fetch_pc_i_valid,
1094                        fetch_insn_o_valid, fetch_insn_i_ready)
1095
1096         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1097                        dbg, core_rst, is_svp64_mode,
1098                        fetch_pc_o_ready, fetch_pc_i_valid,
1099                        fetch_insn_o_valid, fetch_insn_i_ready,
1100                        pred_insn_i_valid, pred_insn_o_ready,
1101                        pred_mask_o_valid, pred_mask_i_ready,
1102                        exec_insn_i_valid, exec_insn_o_ready,
1103                        exec_pc_o_valid, exec_pc_i_ready)
1104
1105         if self.svp64_en:
1106             self.fetch_predicate_fsm(m,
1107                                      pred_insn_i_valid, pred_insn_o_ready,
1108                                      pred_mask_o_valid, pred_mask_i_ready)
1109
1110         self.execute_fsm(m, core, pc_changed, sv_changed,
1111                          exec_insn_i_valid, exec_insn_o_ready,
1112                          exec_pc_o_valid, exec_pc_i_ready)
1113
1114         # whatever was done above, over-ride it if core reset is held
1115         with m.If(core_rst):
1116             sync += nia.eq(0)
1117
1118         # this bit doesn't have to be in the FSM: connect up to read
1119         # regfiles on demand from DMI
1120         self.do_dmi(m, dbg)
1121
1122         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1123         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1124         self.tb_dec_fsm(m, cur_state.dec)
1125
1126         return m
1127
1128     def do_dmi(self, m, dbg):
1129         """deals with DMI debug requests
1130
1131         currently only provides read requests for the INT regfile, CR and XER
1132         it will later also deal with *writing* to these regfiles.
1133         """
1134         comb = m.d.comb
1135         sync = m.d.sync
1136         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1137         intrf = self.core.regs.rf['int']
1138
1139         with m.If(d_reg.req): # request for regfile access being made
1140             # TODO: error-check this
1141             # XXX should this be combinatorial?  sync better?
1142             if intrf.unary:
1143                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1144             else:
1145                 comb += self.int_r.addr.eq(d_reg.addr)
1146                 comb += self.int_r.ren.eq(1)
1147         d_reg_delay  = Signal()
1148         sync += d_reg_delay.eq(d_reg.req)
1149         with m.If(d_reg_delay):
1150             # data arrives one clock later
1151             comb += d_reg.data.eq(self.int_r.o_data)
1152             comb += d_reg.ack.eq(1)
1153
1154         # sigh same thing for CR debug
1155         with m.If(d_cr.req): # request for regfile access being made
1156             comb += self.cr_r.ren.eq(0b11111111) # enable all
1157         d_cr_delay  = Signal()
1158         sync += d_cr_delay.eq(d_cr.req)
1159         with m.If(d_cr_delay):
1160             # data arrives one clock later
1161             comb += d_cr.data.eq(self.cr_r.o_data)
1162             comb += d_cr.ack.eq(1)
1163
1164         # aaand XER...
1165         with m.If(d_xer.req): # request for regfile access being made
1166             comb += self.xer_r.ren.eq(0b111111) # enable all
1167         d_xer_delay  = Signal()
1168         sync += d_xer_delay.eq(d_xer.req)
1169         with m.If(d_xer_delay):
1170             # data arrives one clock later
1171             comb += d_xer.data.eq(self.xer_r.o_data)
1172             comb += d_xer.ack.eq(1)
1173
1174     def tb_dec_fsm(self, m, spr_dec):
1175         """tb_dec_fsm
1176
1177         this is a FSM for updating either dec or tb.  it runs alternately
1178         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1179         value to DEC, however the regfile has "passthrough" on it so this
1180         *should* be ok.
1181
1182         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1183         """
1184
1185         comb, sync = m.d.comb, m.d.sync
1186         fast_rf = self.core.regs.rf['fast']
1187         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1188         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1189
1190         with m.FSM() as fsm:
1191
1192             # initiates read of current DEC
1193             with m.State("DEC_READ"):
1194                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1195                 comb += fast_r_dectb.ren.eq(1)
1196                 m.next = "DEC_WRITE"
1197
1198             # waits for DEC read to arrive (1 cycle), updates with new value
1199             with m.State("DEC_WRITE"):
1200                 new_dec = Signal(64)
1201                 # TODO: MSR.LPCR 32-bit decrement mode
1202                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1203                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1204                 comb += fast_w_dectb.wen.eq(1)
1205                 comb += fast_w_dectb.i_data.eq(new_dec)
1206                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1207                 m.next = "TB_READ"
1208
1209             # initiates read of current TB
1210             with m.State("TB_READ"):
1211                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1212                 comb += fast_r_dectb.ren.eq(1)
1213                 m.next = "TB_WRITE"
1214
1215             # waits for read TB to arrive, initiates write of current TB
1216             with m.State("TB_WRITE"):
1217                 new_tb = Signal(64)
1218                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1219                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1220                 comb += fast_w_dectb.wen.eq(1)
1221                 comb += fast_w_dectb.i_data.eq(new_tb)
1222                 m.next = "DEC_READ"
1223
1224         return m
1225
1226     def __iter__(self):
1227         yield from self.pc_i.ports()
1228         yield self.pc_o
1229         yield self.memerr_o
1230         yield from self.core.ports()
1231         yield from self.imem.ports()
1232         yield self.core_bigendian_i
1233         yield self.busy_o
1234
1235     def ports(self):
1236         return list(self)
1237
1238     def external_ports(self):
1239         ports = self.pc_i.ports()
1240         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1241                 ]
1242
1243         if self.jtag_en:
1244             ports += list(self.jtag.external_ports())
1245         else:
1246             # don't add DMI if JTAG is enabled
1247             ports += list(self.dbg.dmi.ports())
1248
1249         ports += list(self.imem.ibus.fields.values())
1250         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1251
1252         if self.sram4x4k:
1253             for sram in self.sram4k:
1254                 ports += list(sram.bus.fields.values())
1255
1256         if self.xics:
1257             ports += list(self.xics_icp.bus.fields.values())
1258             ports += list(self.xics_ics.bus.fields.values())
1259             ports.append(self.int_level_i)
1260
1261         if self.gpio:
1262             ports += list(self.simple_gpio.bus.fields.values())
1263             ports.append(self.gpio_o)
1264
1265         return ports
1266
1267     def ports(self):
1268         return list(self)
1269
1270
1271 class TestIssuer(Elaboratable):
1272     def __init__(self, pspec):
1273         self.ti = TestIssuerInternal(pspec)
1274         self.pll = DummyPLL(instance=True)
1275
1276         # PLL direct clock or not
1277         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1278         if self.pll_en:
1279             self.pll_test_o = Signal(reset_less=True)
1280             self.pll_vco_o = Signal(reset_less=True)
1281             self.clk_sel_i = Signal(2, reset_less=True)
1282             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1283             self.pllclk_clk = ClockSignal("pllclk")
1284
1285     def elaborate(self, platform):
1286         m = Module()
1287         comb = m.d.comb
1288
1289         # TestIssuer nominally runs at main clock, actually it is
1290         # all combinatorial internally except for coresync'd components
1291         m.submodules.ti = ti = self.ti
1292
1293         if self.pll_en:
1294             # ClockSelect runs at PLL output internal clock rate
1295             m.submodules.wrappll = pll = self.pll
1296
1297             # add clock domains from PLL
1298             cd_pll = ClockDomain("pllclk")
1299             m.domains += cd_pll
1300
1301             # PLL clock established.  has the side-effect of running clklsel
1302             # at the PLL's speed (see DomainRenamer("pllclk") above)
1303             pllclk = self.pllclk_clk
1304             comb += pllclk.eq(pll.clk_pll_o)
1305
1306             # wire up external 24mhz to PLL
1307             #comb += pll.clk_24_i.eq(self.ref_clk)
1308             # output 18 mhz PLL test signal, and analog oscillator out
1309             comb += self.pll_test_o.eq(pll.pll_test_o)
1310             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1311
1312             # input to pll clock selection
1313             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1314
1315             # now wire up ResetSignals.  don't mind them being in this domain
1316             pll_rst = ResetSignal("pllclk")
1317             comb += pll_rst.eq(ResetSignal())
1318
1319         # internal clock is set to selector clock-out.  has the side-effect of
1320         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1321         # debug clock runs at coresync internal clock
1322         cd_coresync = ClockDomain("coresync")
1323         #m.domains += cd_coresync
1324         if self.ti.dbg_domain != 'sync':
1325             cd_dbgsync = ClockDomain("dbgsync")
1326             #m.domains += cd_dbgsync
1327         intclk = ClockSignal("coresync")
1328         dbgclk = ClockSignal(self.ti.dbg_domain)
1329         # XXX BYPASS PLL XXX
1330         # XXX BYPASS PLL XXX
1331         # XXX BYPASS PLL XXX
1332         if self.pll_en:
1333             comb += intclk.eq(self.ref_clk)
1334         else:
1335             comb += intclk.eq(ClockSignal())
1336         if self.ti.dbg_domain != 'sync':
1337             dbgclk = ClockSignal(self.ti.dbg_domain)
1338             comb += dbgclk.eq(intclk)
1339
1340         return m
1341
1342     def ports(self):
1343         return list(self.ti.ports()) + list(self.pll.ports()) + \
1344                [ClockSignal(), ResetSignal()]
1345
1346     def external_ports(self):
1347         ports = self.ti.external_ports()
1348         ports.append(ClockSignal())
1349         ports.append(ResetSignal())
1350         if self.pll_en:
1351             ports.append(self.clk_sel_i)
1352             ports.append(self.pll.clk_24_i)
1353             ports.append(self.pll_test_o)
1354             ports.append(self.pll_vco_o)
1355             ports.append(self.pllclk_clk)
1356             ports.append(self.ref_clk)
1357         return ports
1358
1359
1360 if __name__ == '__main__':
1361     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1362              'spr': 1,
1363              'div': 1,
1364              'mul': 1,
1365              'shiftrot': 1
1366             }
1367     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1368                          imem_ifacetype='bare_wb',
1369                          addr_wid=48,
1370                          mask_wid=8,
1371                          reg_wid=64,
1372                          units=units)
1373     dut = TestIssuer(pspec)
1374     vl = main(dut, ports=dut.ports(), name="test_issuer")
1375
1376     if len(sys.argv) == 1:
1377         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1378         with open("test_issuer.il", "w") as f:
1379             f.write(vl)