src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         if self.jtag_en:
 174             # XXX MUST keep this up-to-date with litex, and
 175             # soc-cocotb-sim, and err.. all needs sorting out, argh
 176             subset = ['uart',
 177                       'mtwi',
 178                       'eint', 'gpio', 'mspi0',
 179                       # 'mspi1', - disabled for now
 180                       # 'pwm', 'sd0', - disabled for now
 181                        'sdr']
 182             self.jtag = JTAG(get_pinspecs(subset=subset))
 183             # add signals to pspec to enable/disable icache and dcache
 184             # (or data and intstruction wishbone if icache/dcache not included)
 185             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 186             # TODO: do we actually care if these are not domain-synchronised?
 187             # honestly probably not.
 188             pspec.wb_icache_en = self.jtag.wb_icache_en
 189             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 190             self.wb_sram_en = self.jtag.wb_sram_en
 191         else:
 192             self.wb_sram_en = Const(1)
 193
 194         # add 4k sram blocks?
 195         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 196                          pspec.sram4x4kblock == True)
 197         if self.sram4x4k:
 198             self.sram4k = []
 199             for i in range(4):
 200                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 201                                                     #features={'err'}
 202                                                     ))
 203
 204         # add interrupt controller?
 205         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 206         if self.xics:
 207             self.xics_icp = XICS_ICP()
 208             self.xics_ics = XICS_ICS()
 209             self.int_level_i = self.xics_ics.int_level_i
 210
 211         # add GPIO peripheral?
 212         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 213         if self.gpio:
 214             self.simple_gpio = SimpleGPIO()
 215             self.gpio_o = self.simple_gpio.gpio_o
 216
 217         # main instruction core.  suitable for prototyping / demo only
 218         self.core = core = NonProductionCore(pspec)
 219
 220         # instruction decoder.  goes into Trap Record
 221         pdecode = create_pdecode()
 222         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 223         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
 224                                      opkls=IssuerDecode2ToOperand,
 225                                      svp64_en=self.svp64_en,
 226                                      regreduce_en=self.regreduce_en)
 227         if self.svp64_en:
 228             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 229
 230         # Test Instruction memory
 231         self.imem = ConfigFetchUnit(pspec).fu
 232
 233         # DMI interface
 234         self.dbg = CoreDebug()
 235
 236         # instruction go/monitor
 237         self.pc_o = Signal(64, reset_less=True)
 238         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 239         self.svstate_i = Data(32, "svstate_i") # ditto
 240         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 241         self.busy_o = Signal(reset_less=True)
 242         self.memerr_o = Signal(reset_less=True)
 243
 244         # STATE regfile read /write ports for PC, MSR, SVSTATE
 245         staterf = self.core.regs.rf['state']
 246         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 247         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 248         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 249         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 250         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 251
 252         # DMI interface access
 253         intrf = self.core.regs.rf['int']
 254         crrf = self.core.regs.rf['cr']
 255         xerrf = self.core.regs.rf['xer']
 256         self.int_r = intrf.r_ports['dmi'] # INT read
 257         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 258         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 259
 260         if self.svp64_en:
 261             # for predication
 262             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 263             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 264
 265         # hack method of keeping an eye on whether branch/trap set the PC
 266         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 267         self.state_nia.wen.name = 'state_nia_wen'
 268
 269         # pulse to synchronize the simulator at instruction end
 270         self.insn_done = Signal()
 271
 272         if self.svp64_en:
 273             # store copies of predicate masks
 274             self.srcmask = Signal(64)
 275             self.dstmask = Signal(64)
 276
 277     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 278                         fetch_pc_ready_o, fetch_pc_valid_i,
 279                         fetch_insn_valid_o, fetch_insn_ready_i):
 280         """fetch FSM
 281
 282         this FSM performs fetch of raw instruction data, partial-decodes
 283         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 284         read a 2nd 32-bit quantity if that occurs.
 285         """
 286         comb = m.d.comb
 287         sync = m.d.sync
 288         pdecode2 = self.pdecode2
 289         cur_state = self.cur_state
 290         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 291
 292         msr_read = Signal(reset=1)
 293
 294         with m.FSM(name='fetch_fsm'):
 295
 296             # waiting (zzz)
 297             with m.State("IDLE"):
 298                 comb += fetch_pc_ready_o.eq(1)
 299                 with m.If(fetch_pc_valid_i):
 300                     # instruction allowed to go: start by reading the PC
 301                     # capture the PC and also drop it into Insn Memory
 302                     # we have joined a pair of combinatorial memory
 303                     # lookups together.  this is Generally Bad.
 304                     comb += self.imem.a_pc_i.eq(pc)
 305                     comb += self.imem.a_valid_i.eq(1)
 306                     comb += self.imem.f_valid_i.eq(1)
 307                     sync += cur_state.pc.eq(pc)
 308                     sync += cur_state.svstate.eq(svstate) # and svstate
 309
 310                     # initiate read of MSR. arrives one clock later
 311                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 312                     sync += msr_read.eq(0)
 313
 314                     m.next = "INSN_READ"  # move to "wait for bus" phase
 315
 316             # dummy pause to find out why simulation is not keeping up
 317             with m.State("INSN_READ"):
 318                 # one cycle later, msr/sv read arrives.  valid only once.
 319                 with m.If(~msr_read):
 320                     sync += msr_read.eq(1) # yeah don't read it again
 321                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 322                 with m.If(self.imem.f_busy_o): # zzz...
 323                     # busy: stay in wait-read
 324                     comb += self.imem.a_valid_i.eq(1)
 325                     comb += self.imem.f_valid_i.eq(1)
 326                 with m.Else():
 327                     # not busy: instruction fetched
 328                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 329                     if self.svp64_en:
 330                         svp64 = self.svp64
 331                         # decode the SVP64 prefix, if any
 332                         comb += svp64.raw_opcode_in.eq(insn)
 333                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 334                         # pass the decoded prefix (if any) to PowerDecoder2
 335                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 336                         # remember whether this is a prefixed instruction, so
 337                         # the FSM can readily loop when VL==0
 338                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 339                         # calculate the address of the following instruction
 340                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 341                         sync += nia.eq(cur_state.pc + insn_size)
 342                         with m.If(~svp64.is_svp64_mode):
 343                             # with no prefix, store the instruction
 344                             # and hand it directly to the next FSM
 345                             sync += dec_opcode_i.eq(insn)
 346                             m.next = "INSN_READY"
 347                         with m.Else():
 348                             # fetch the rest of the instruction from memory
 349                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 350                             comb += self.imem.a_valid_i.eq(1)
 351                             comb += self.imem.f_valid_i.eq(1)
 352                             m.next = "INSN_READ2"
 353                     else:
 354                         # not SVP64 - 32-bit only
 355                         sync += nia.eq(cur_state.pc + 4)
 356                         sync += dec_opcode_i.eq(insn)
 357                         m.next = "INSN_READY"
 358
 359             with m.State("INSN_READ2"):
 360                 with m.If(self.imem.f_busy_o):  # zzz...
 361                     # busy: stay in wait-read
 362                     comb += self.imem.a_valid_i.eq(1)
 363                     comb += self.imem.f_valid_i.eq(1)
 364                 with m.Else():
 365                     # not busy: instruction fetched
 366                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 367                     sync += dec_opcode_i.eq(insn)
 368                     m.next = "INSN_READY"
 369                     # TODO: probably can start looking at pdecode2.rm_dec
 370                     # here or maybe even in INSN_READ state, if svp64_mode
 371                     # detected, in order to trigger - and wait for - the
 372                     # predicate reading.
 373                     if self.svp64_en:
 374                         pmode = pdecode2.rm_dec.predmode
 375                     """
 376                     if pmode != SVP64PredMode.ALWAYS.value:
 377                         fire predicate loading FSM and wait before
 378                         moving to INSN_READY
 379                     else:
 380                         sync += self.srcmask.eq(-1) # set to all 1s
 381                         sync += self.dstmask.eq(-1) # set to all 1s
 382                         m.next = "INSN_READY"
 383                     """
 384
 385             with m.State("INSN_READY"):
 386                 # hand over the instruction, to be decoded
 387                 comb += fetch_insn_valid_o.eq(1)
 388                 with m.If(fetch_insn_ready_i):
 389                     m.next = "IDLE"
 390
 391     def fetch_predicate_fsm(self, m,
 392                             pred_insn_valid_i, pred_insn_ready_o,
 393                             pred_mask_valid_o, pred_mask_ready_i):
 394         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 395            src/dest predicate masks
 396
 397         https://bugs.libre-soc.org/show_bug.cgi?id=617
 398         the predicates can be read here, by using IntRegs r_ports['pred']
 399         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 400         be done through multiple reads, extracting one relevant at a time.
 401         later, a faster way would be to use the 32-bit-wide CR port but
 402         this is more complex decoding, here.  equivalent code used in
 403         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 404
 405         note: this ENTIRE FSM is not to be called when svp64 is disabled
 406         """
 407         comb = m.d.comb
 408         sync = m.d.sync
 409         pdecode2 = self.pdecode2
 410         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 411         predmode = rm_dec.predmode
 412         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 413         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 414         # get src/dst step, so we can skip already used mask bits
 415         cur_state = self.cur_state
 416         srcstep = cur_state.svstate.srcstep
 417         dststep = cur_state.svstate.dststep
 418         cur_vl = cur_state.svstate.vl
 419
 420         # decode predicates
 421         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 422         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 423         sidx, scrinvert = get_predcr(m, srcpred, 's')
 424         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 425
 426         # store fetched masks, for either intpred or crpred
 427         # when src/dst step is not zero, the skipped mask bits need to be
 428         # shifted-out, before actually storing them in src/dest mask
 429         new_srcmask = Signal(64, reset_less=True)
 430         new_dstmask = Signal(64, reset_less=True)
 431
 432         with m.FSM(name="fetch_predicate"):
 433
 434             with m.State("FETCH_PRED_IDLE"):
 435                 comb += pred_insn_ready_o.eq(1)
 436                 with m.If(pred_insn_valid_i):
 437                     with m.If(predmode == SVP64PredMode.INT):
 438                         # skip fetching destination mask register, when zero
 439                         with m.If(dall1s):
 440                             sync += new_dstmask.eq(-1)
 441                             # directly go to fetch source mask register
 442                             # guaranteed not to be zero (otherwise predmode
 443                             # would be SVP64PredMode.ALWAYS, not INT)
 444                             comb += int_pred.addr.eq(sregread)
 445                             comb += int_pred.ren.eq(1)
 446                             m.next = "INT_SRC_READ"
 447                         # fetch destination predicate register
 448                         with m.Else():
 449                             comb += int_pred.addr.eq(dregread)
 450                             comb += int_pred.ren.eq(1)
 451                             m.next = "INT_DST_READ"
 452                     with m.Elif(predmode == SVP64PredMode.CR):
 453                         # go fetch masks from the CR register file
 454                         sync += new_srcmask.eq(0)
 455                         sync += new_dstmask.eq(0)
 456                         m.next = "CR_READ"
 457                     with m.Else():
 458                         sync += self.srcmask.eq(-1)
 459                         sync += self.dstmask.eq(-1)
 460                         m.next = "FETCH_PRED_DONE"
 461
 462             with m.State("INT_DST_READ"):
 463                 # store destination mask
 464                 inv = Repl(dinvert, 64)
 465                 with m.If(dunary):
 466                     # set selected mask bit for 1<<r3 mode
 467                     dst_shift = Signal(range(64))
 468                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 469                     sync += new_dstmask.eq(1 << dst_shift)
 470                 with m.Else():
 471                     # invert mask if requested
 472                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 473                 # skip fetching source mask register, when zero
 474                 with m.If(sall1s):
 475                     sync += new_srcmask.eq(-1)
 476                     m.next = "FETCH_PRED_SHIFT_MASK"
 477                 # fetch source predicate register
 478                 with m.Else():
 479                     comb += int_pred.addr.eq(sregread)
 480                     comb += int_pred.ren.eq(1)
 481                     m.next = "INT_SRC_READ"
 482
 483             with m.State("INT_SRC_READ"):
 484                 # store source mask
 485                 inv = Repl(sinvert, 64)
 486                 with m.If(sunary):
 487                     # set selected mask bit for 1<<r3 mode
 488                     src_shift = Signal(range(64))
 489                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 490                     sync += new_srcmask.eq(1 << src_shift)
 491                 with m.Else():
 492                     # invert mask if requested
 493                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 494                 m.next = "FETCH_PRED_SHIFT_MASK"
 495
 496             # fetch masks from the CR register file
 497             # implements the following loop:
 498             # idx, inv = get_predcr(mask)
 499             # mask = 0
 500             # for cr_idx in range(vl):
 501             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 502             #     if cr[idx] ^ inv:
 503             #         mask |= 1 << cr_idx
 504             # return mask
 505             with m.State("CR_READ"):
 506                 # CR index to be read, which will be ready by the next cycle
 507                 cr_idx = Signal.like(cur_vl, reset_less=True)
 508                 # submit the read operation to the regfile
 509                 with m.If(cr_idx != cur_vl):
 510                     # the CR read port is unary ...
 511                     # ren = 1 << cr_idx
 512                     # ... in MSB0 convention ...
 513                     # ren = 1 << (7 - cr_idx)
 514                     # ... and with an offset:
 515                     # ren = 1 << (7 - off - cr_idx)
 516                     idx = SVP64CROffs.CRPred + cr_idx
 517                     comb += cr_pred.ren.eq(1 << (7 - idx))
 518                     # signal data valid in the next cycle
 519                     cr_read = Signal(reset_less=True)
 520                     sync += cr_read.eq(1)
 521                     # load the next index
 522                     sync += cr_idx.eq(cr_idx + 1)
 523                 with m.Else():
 524                     # exit on loop end
 525                     sync += cr_read.eq(0)
 526                     sync += cr_idx.eq(0)
 527                     m.next = "FETCH_PRED_SHIFT_MASK"
 528                 with m.If(cr_read):
 529                     # compensate for the one cycle delay on the regfile
 530                     cur_cr_idx = Signal.like(cur_vl)
 531                     comb += cur_cr_idx.eq(cr_idx - 1)
 532                     # read the CR field, select the appropriate bit
 533                     cr_field = Signal(4)
 534                     scr_bit = Signal()
 535                     dcr_bit = Signal()
 536                     comb += cr_field.eq(cr_pred.data_o)
 537                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 538                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 539                     # set the corresponding mask bit
 540                     bit_to_set = Signal.like(self.srcmask)
 541                     comb += bit_to_set.eq(1 << cur_cr_idx)
 542                     with m.If(scr_bit):
 543                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 544                     with m.If(dcr_bit):
 545                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 546
 547             with m.State("FETCH_PRED_SHIFT_MASK"):
 548                 # shift-out skipped mask bits
 549                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 550                 sync += self.dstmask.eq(new_dstmask >> dststep)
 551                 m.next = "FETCH_PRED_DONE"
 552
 553             with m.State("FETCH_PRED_DONE"):
 554                 comb += pred_mask_valid_o.eq(1)
 555                 with m.If(pred_mask_ready_i):
 556                     m.next = "FETCH_PRED_IDLE"
 557
 558     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 559                   dbg, core_rst, is_svp64_mode,
 560                   fetch_pc_ready_o, fetch_pc_valid_i,
 561                   fetch_insn_valid_o, fetch_insn_ready_i,
 562                   pred_insn_valid_i, pred_insn_ready_o,
 563                   pred_mask_valid_o, pred_mask_ready_i,
 564                   exec_insn_valid_i, exec_insn_ready_o,
 565                   exec_pc_valid_o, exec_pc_ready_i):
 566         """issue FSM
 567
 568         decode / issue FSM.  this interacts with the "fetch" FSM
 569         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 570         (outgoing). also interacts with the "execute" FSM
 571         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 572         (incoming).
 573         SVP64 RM prefixes have already been set up by the
 574         "fetch" phase, so execute is fairly straightforward.
 575         """
 576
 577         comb = m.d.comb
 578         sync = m.d.sync
 579         pdecode2 = self.pdecode2
 580         cur_state = self.cur_state
 581
 582         # temporaries
 583         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 584
 585         # for updating svstate (things like srcstep etc.)
 586         update_svstate = Signal() # set this (below) if updating
 587         new_svstate = SVSTATERec("new_svstate")
 588         comb += new_svstate.eq(cur_state.svstate)
 589
 590         # precalculate srcstep+1 and dststep+1
 591         cur_srcstep = cur_state.svstate.srcstep
 592         cur_dststep = cur_state.svstate.dststep
 593         next_srcstep = Signal.like(cur_srcstep)
 594         next_dststep = Signal.like(cur_dststep)
 595         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 596         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 597
 598         # note if an exception happened.  in a pipelined or OoO design
 599         # this needs to be accompanied by "shadowing" (or stalling)
 600         el = []
 601         for exc in core.fus.excs.values():
 602             el.append(exc.happened)
 603         exc_happened = Signal()
 604         if len(el) > 0: # at least one exception
 605             comb += exc_happened.eq(Cat(*el).bool())
 606
 607         with m.FSM(name="issue_fsm"):
 608
 609             # sync with the "fetch" phase which is reading the instruction
 610             # at this point, there is no instruction running, that
 611             # could inadvertently update the PC.
 612             with m.State("ISSUE_START"):
 613                 # wait on "core stop" release, before next fetch
 614                 # need to do this here, in case we are in a VL==0 loop
 615                 with m.If(~dbg.core_stop_o & ~core_rst):
 616                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 617                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 618                         m.next = "INSN_WAIT"
 619                 with m.Else():
 620                     # tell core it's stopped, and acknowledge debug handshake
 621                     comb += dbg.core_stopped_i.eq(1)
 622                     # while stopped, allow updating the PC and SVSTATE
 623                     with m.If(self.pc_i.ok):
 624                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 625                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 626                         sync += pc_changed.eq(1)
 627                     with m.If(self.svstate_i.ok):
 628                         comb += new_svstate.eq(self.svstate_i.data)
 629                         comb += update_svstate.eq(1)
 630                         sync += sv_changed.eq(1)
 631
 632             # wait for an instruction to arrive from Fetch
 633             with m.State("INSN_WAIT"):
 634                 comb += fetch_insn_ready_i.eq(1)
 635                 with m.If(fetch_insn_valid_o):
 636                     # loop into ISSUE_START if it's a SVP64 instruction
 637                     # and VL == 0.  this because VL==0 is a for-loop
 638                     # from 0 to 0 i.e. always, always a NOP.
 639                     cur_vl = cur_state.svstate.vl
 640                     with m.If(is_svp64_mode & (cur_vl == 0)):
 641                         # update the PC before fetching the next instruction
 642                         # since we are in a VL==0 loop, no instruction was
 643                         # executed that we could be overwriting
 644                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 645                         comb += self.state_w_pc.data_i.eq(nia)
 646                         comb += self.insn_done.eq(1)
 647                         m.next = "ISSUE_START"
 648                     with m.Else():
 649                         if self.svp64_en:
 650                             m.next = "PRED_START"  # start fetching predicate
 651                         else:
 652                             m.next = "DECODE_SV"  # skip predication
 653
 654             with m.State("PRED_START"):
 655                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 656                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 657                     m.next = "MASK_WAIT"
 658
 659             with m.State("MASK_WAIT"):
 660                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 661                 with m.If(pred_mask_valid_o): # predication masks are ready
 662                     m.next = "PRED_SKIP"
 663
 664             # skip zeros in predicate
 665             with m.State("PRED_SKIP"):
 666                 with m.If(~is_svp64_mode):
 667                     m.next = "DECODE_SV"  # nothing to do
 668                 with m.Else():
 669                     if self.svp64_en:
 670                         pred_src_zero = pdecode2.rm_dec.pred_sz
 671                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 672
 673                         # new srcstep, after skipping zeros
 674                         skip_srcstep = Signal.like(cur_srcstep)
 675                         # value to be added to the current srcstep
 676                         src_delta = Signal.like(cur_srcstep)
 677                         # add leading zeros to srcstep, if not in zero mode
 678                         with m.If(~pred_src_zero):
 679                             # priority encoder (count leading zeros)
 680                             # append guard bit, in case the mask is all zeros
 681                             pri_enc_src = PriorityEncoder(65)
 682                             m.submodules.pri_enc_src = pri_enc_src
 683                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 684                                                          Const(1, 1)))
 685                             comb += src_delta.eq(pri_enc_src.o)
 686                         # apply delta to srcstep
 687                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 688                         # shift-out all leading zeros from the mask
 689                         # plus the leading "one" bit
 690                         # TODO count leading zeros and shift-out the zero
 691                         #      bits, in the same step, in hardware
 692                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 693
 694                         # same as above, but for dststep
 695                         skip_dststep = Signal.like(cur_dststep)
 696                         dst_delta = Signal.like(cur_dststep)
 697                         with m.If(~pred_dst_zero):
 698                             pri_enc_dst = PriorityEncoder(65)
 699                             m.submodules.pri_enc_dst = pri_enc_dst
 700                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 701                                                          Const(1, 1)))
 702                             comb += dst_delta.eq(pri_enc_dst.o)
 703                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 704                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 705
 706                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 707                         with m.If((skip_srcstep >= cur_vl) |
 708                                   (skip_dststep >= cur_vl)):
 709                             # end of VL loop. Update PC and reset src/dst step
 710                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 711                             comb += self.state_w_pc.data_i.eq(nia)
 712                             comb += new_svstate.srcstep.eq(0)
 713                             comb += new_svstate.dststep.eq(0)
 714                             comb += update_svstate.eq(1)
 715                             # synchronize with the simulator
 716                             comb += self.insn_done.eq(1)
 717                             # go back to Issue
 718                             m.next = "ISSUE_START"
 719                         with m.Else():
 720                             # update new src/dst step
 721                             comb += new_svstate.srcstep.eq(skip_srcstep)
 722                             comb += new_svstate.dststep.eq(skip_dststep)
 723                             comb += update_svstate.eq(1)
 724                             # proceed to Decode
 725                             m.next = "DECODE_SV"
 726
 727                         # pass predicate mask bits through to satellite decoders
 728                         # TODO: for SIMD this will be *multiple* bits
 729                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 730                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 731
 732             # after src/dst step have been updated, we are ready
 733             # to decode the instruction
 734             with m.State("DECODE_SV"):
 735                 # decode the instruction
 736                 sync += core.e.eq(pdecode2.e)
 737                 sync += core.state.eq(cur_state)
 738                 sync += core.raw_insn_i.eq(dec_opcode_i)
 739                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 740                 if self.svp64_en:
 741                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 742                     # set RA_OR_ZERO detection in satellite decoders
 743                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 744
 745                 m.next = "INSN_EXECUTE"  # move to "execute"
 746
 747             # handshake with execution FSM, move to "wait" once acknowledged
 748             with m.State("INSN_EXECUTE"):
 749                 comb += exec_insn_valid_i.eq(1) # trigger execute
 750                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 751                     m.next = "EXECUTE_WAIT"
 752
 753             with m.State("EXECUTE_WAIT"):
 754                 # wait on "core stop" release, at instruction end
 755                 # need to do this here, in case we are in a VL>1 loop
 756                 with m.If(~dbg.core_stop_o & ~core_rst):
 757                     comb += exec_pc_ready_i.eq(1)
 758                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 759                     #with m.If(exec_pc_valid_o & exc_happened):
 760                     #    probably something like this:
 761                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 762                     # TODO: the exception info needs to be blatted
 763                     # into pdecode.ldst_exc, and the instruction "re-run".
 764                     # when ldst_exc.happened is set, the PowerDecoder2
 765                     # reacts very differently: it re-writes the instruction
 766                     # with a "trap" (calls PowerDecoder2.trap()) which
 767                     # will *overwrite* whatever was requested and jump the
 768                     # PC to the exception address, as well as alter MSR.
 769                     # nothing else needs to be done other than to note
 770                     # the change of PC and MSR (and, later, SVSTATE)
 771                     #with m.Elif(exec_pc_valid_o):
 772                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 773
 774                         # was this the last loop iteration?
 775                         is_last = Signal()
 776                         cur_vl = cur_state.svstate.vl
 777                         comb += is_last.eq(next_srcstep == cur_vl)
 778
 779                         # if either PC or SVSTATE were changed by the previous
 780                         # instruction, go directly back to Fetch, without
 781                         # updating either PC or SVSTATE
 782                         with m.If(pc_changed | sv_changed):
 783                             m.next = "ISSUE_START"
 784
 785                         # also return to Fetch, when no output was a vector
 786                         # (regardless of SRCSTEP and VL), or when the last
 787                         # instruction was really the last one of the VL loop
 788                         with m.Elif((~pdecode2.loop_continue) | is_last):
 789                             # before going back to fetch, update the PC state
 790                             # register with the NIA.
 791                             # ok here we are not reading the branch unit.
 792                             # TODO: this just blithely overwrites whatever
 793                             #       pipeline updated the PC
 794                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 795                             comb += self.state_w_pc.data_i.eq(nia)
 796                             # reset SRCSTEP before returning to Fetch
 797                             if self.svp64_en:
 798                                 with m.If(pdecode2.loop_continue):
 799                                     comb += new_svstate.srcstep.eq(0)
 800                                     comb += new_svstate.dststep.eq(0)
 801                                     comb += update_svstate.eq(1)
 802                             else:
 803                                 comb += new_svstate.srcstep.eq(0)
 804                                 comb += new_svstate.dststep.eq(0)
 805                                 comb += update_svstate.eq(1)
 806                             m.next = "ISSUE_START"
 807
 808                         # returning to Execute? then, first update SRCSTEP
 809                         with m.Else():
 810                             comb += new_svstate.srcstep.eq(next_srcstep)
 811                             comb += new_svstate.dststep.eq(next_dststep)
 812                             comb += update_svstate.eq(1)
 813                             # return to mask skip loop
 814                             m.next = "PRED_SKIP"
 815
 816                 with m.Else():
 817                     comb += dbg.core_stopped_i.eq(1)
 818                     # while stopped, allow updating the PC and SVSTATE
 819                     with m.If(self.pc_i.ok):
 820                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 821                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 822                         sync += pc_changed.eq(1)
 823                     with m.If(self.svstate_i.ok):
 824                         comb += new_svstate.eq(self.svstate_i.data)
 825                         comb += update_svstate.eq(1)
 826                         sync += sv_changed.eq(1)
 827
 828         # check if svstate needs updating: if so, write it to State Regfile
 829         with m.If(update_svstate):
 830             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 831             comb += self.state_w_sv.data_i.eq(new_svstate)
 832             sync += cur_state.svstate.eq(new_svstate) # for next clock
 833
 834     def execute_fsm(self, m, core, pc_changed, sv_changed,
 835                     exec_insn_valid_i, exec_insn_ready_o,
 836                     exec_pc_valid_o, exec_pc_ready_i):
 837         """execute FSM
 838
 839         execute FSM. this interacts with the "issue" FSM
 840         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 841         (outgoing). SVP64 RM prefixes have already been set up by the
 842         "issue" phase, so execute is fairly straightforward.
 843         """
 844
 845         comb = m.d.comb
 846         sync = m.d.sync
 847         pdecode2 = self.pdecode2
 848
 849         # temporaries
 850         core_busy_o = core.busy_o                 # core is busy
 851         core_ivalid_i = core.ivalid_i             # instruction is valid
 852         core_issue_i = core.issue_i               # instruction is issued
 853         insn_type = core.e.do.insn_type           # instruction MicroOp type
 854
 855         with m.FSM(name="exec_fsm"):
 856
 857             # waiting for instruction bus (stays there until not busy)
 858             with m.State("INSN_START"):
 859                 comb += exec_insn_ready_o.eq(1)
 860                 with m.If(exec_insn_valid_i):
 861                     comb += core_ivalid_i.eq(1)  # instruction is valid
 862                     comb += core_issue_i.eq(1)  # and issued
 863                     sync += sv_changed.eq(0)
 864                     sync += pc_changed.eq(0)
 865                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 866
 867             # instruction started: must wait till it finishes
 868             with m.State("INSN_ACTIVE"):
 869                 with m.If(insn_type != MicrOp.OP_NOP):
 870                     comb += core_ivalid_i.eq(1) # instruction is valid
 871                 # note changes to PC and SVSTATE
 872                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 873                     sync += sv_changed.eq(1)
 874                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 875                     sync += pc_changed.eq(1)
 876                 with m.If(~core_busy_o): # instruction done!
 877                     comb += exec_pc_valid_o.eq(1)
 878                     with m.If(exec_pc_ready_i):
 879                         comb += self.insn_done.eq(1)
 880                         m.next = "INSN_START"  # back to fetch
 881
 882     def setup_peripherals(self, m):
 883         comb, sync = m.d.comb, m.d.sync
 884
 885         m.submodules.core = core = DomainRenamer("coresync")(self.core)
 886         m.submodules.imem = imem = self.imem
 887         m.submodules.dbg = dbg = self.dbg
 888         if self.jtag_en:
 889             m.submodules.jtag = jtag = self.jtag
 890             # TODO: UART2GDB mux, here, from external pin
 891             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 892             sync += dbg.dmi.connect_to(jtag.dmi)
 893
 894         cur_state = self.cur_state
 895
 896         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 897         if self.sram4x4k:
 898             for i, sram in enumerate(self.sram4k):
 899                 m.submodules["sram4k_%d" % i] = sram
 900                 comb += sram.enable.eq(self.wb_sram_en)
 901
 902         # XICS interrupt handler
 903         if self.xics:
 904             m.submodules.xics_icp = icp = self.xics_icp
 905             m.submodules.xics_ics = ics = self.xics_ics
 906             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 907             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 908
 909         # GPIO test peripheral
 910         if self.gpio:
 911             m.submodules.simple_gpio = simple_gpio = self.simple_gpio
 912
 913         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 914         # XXX causes litex ECP5 test to get wrong idea about input and output
 915         # (but works with verilator sim *sigh*)
 916         #if self.gpio and self.xics:
 917         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 918
 919         # instruction decoder
 920         pdecode = create_pdecode()
 921         m.submodules.dec2 = pdecode2 = self.pdecode2
 922         if self.svp64_en:
 923             m.submodules.svp64 = svp64 = self.svp64
 924
 925         # convenience
 926         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 927         intrf = self.core.regs.rf['int']
 928
 929         # clock delay power-on reset
 930         cd_por  = ClockDomain(reset_less=True)
 931         cd_sync = ClockDomain()
 932         core_sync = ClockDomain("coresync")
 933         m.domains += cd_por, cd_sync, core_sync
 934
 935         ti_rst = Signal(reset_less=True)
 936         delay = Signal(range(4), reset=3)
 937         with m.If(delay != 0):
 938             m.d.por += delay.eq(delay - 1)
 939         comb += cd_por.clk.eq(ClockSignal())
 940
 941         # power-on reset delay
 942         core_rst = ResetSignal("coresync")
 943         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 944         comb += core_rst.eq(ti_rst)
 945
 946         # busy/halted signals from core
 947         comb += self.busy_o.eq(core.busy_o)
 948         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 949
 950         # temporary hack: says "go" immediately for both address gen and ST
 951         l0 = core.l0
 952         ldst = core.fus.fus['ldst0']
 953         st_go_edge = rising_edge(m, ldst.st.rel_o)
 954         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 955         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 956
 957         return core_rst
 958
 959     def elaborate(self, platform):
 960         m = Module()
 961         # convenience
 962         comb, sync = m.d.comb, m.d.sync
 963         cur_state = self.cur_state
 964         pdecode2 = self.pdecode2
 965         dbg = self.dbg
 966         core = self.core
 967
 968         # set up peripherals and core
 969         core_rst = self.setup_peripherals(m)
 970
 971         # reset current state if core reset requested
 972         with m.If(core_rst):
 973             m.d.sync += self.cur_state.eq(0)
 974
 975         # PC and instruction from I-Memory
 976         comb += self.pc_o.eq(cur_state.pc)
 977         pc_changed = Signal() # note write to PC
 978         sv_changed = Signal() # note write to SVSTATE
 979
 980         # read state either from incoming override or from regfile
 981         # TODO: really should be doing MSR in the same way
 982         pc = state_get(m, core_rst, self.pc_i,
 983                             "pc",                  # read PC
 984                             self.state_r_pc, StateRegs.PC)
 985         svstate = state_get(m, core_rst, self.svstate_i,
 986                             "svstate",   # read SVSTATE
 987                             self.state_r_sv, StateRegs.SVSTATE)
 988
 989         # don't write pc every cycle
 990         comb += self.state_w_pc.wen.eq(0)
 991         comb += self.state_w_pc.data_i.eq(0)
 992
 993         # don't read msr every cycle
 994         comb += self.state_r_msr.ren.eq(0)
 995
 996         # address of the next instruction, in the absence of a branch
 997         # depends on the instruction size
 998         nia = Signal(64)
 999
1000         # connect up debug signals
1001         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1002         comb += dbg.terminate_i.eq(core.core_terminate_o)
1003         comb += dbg.state.pc.eq(pc)
1004         comb += dbg.state.svstate.eq(svstate)
1005         comb += dbg.state.msr.eq(cur_state.msr)
1006
1007         # pass the prefix mode from Fetch to Issue, so the latter can loop
1008         # on VL==0
1009         is_svp64_mode = Signal()
1010
1011         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1012         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1013         # these are the handshake signals between each
1014
1015         # fetch FSM can run as soon as the PC is valid
1016         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1017         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1018
1019         # fetch FSM hands over the instruction to be decoded / issued
1020         fetch_insn_valid_o = Signal()
1021         fetch_insn_ready_i = Signal()
1022
1023         # predicate fetch FSM decodes and fetches the predicate
1024         pred_insn_valid_i = Signal()
1025         pred_insn_ready_o = Signal()
1026
1027         # predicate fetch FSM delivers the masks
1028         pred_mask_valid_o = Signal()
1029         pred_mask_ready_i = Signal()
1030
1031         # issue FSM delivers the instruction to the be executed
1032         exec_insn_valid_i = Signal()
1033         exec_insn_ready_o = Signal()
1034
1035         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1036         exec_pc_valid_o = Signal()
1037         exec_pc_ready_i = Signal()
1038
1039         # the FSMs here are perhaps unusual in that they detect conditions
1040         # then "hold" information, combinatorially, for the core
1041         # (as opposed to using sync - which would be on a clock's delay)
1042         # this includes the actual opcode, valid flags and so on.
1043
1044         # Fetch, then predicate fetch, then Issue, then Execute.
1045         # Issue is where the VL for-loop # lives.  the ready/valid
1046         # signalling is used to communicate between the four.
1047
1048         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1049                        fetch_pc_ready_o, fetch_pc_valid_i,
1050                        fetch_insn_valid_o, fetch_insn_ready_i)
1051
1052         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1053                        dbg, core_rst, is_svp64_mode,
1054                        fetch_pc_ready_o, fetch_pc_valid_i,
1055                        fetch_insn_valid_o, fetch_insn_ready_i,
1056                        pred_insn_valid_i, pred_insn_ready_o,
1057                        pred_mask_valid_o, pred_mask_ready_i,
1058                        exec_insn_valid_i, exec_insn_ready_o,
1059                        exec_pc_valid_o, exec_pc_ready_i)
1060
1061         if self.svp64_en:
1062             self.fetch_predicate_fsm(m,
1063                                      pred_insn_valid_i, pred_insn_ready_o,
1064                                      pred_mask_valid_o, pred_mask_ready_i)
1065
1066         self.execute_fsm(m, core, pc_changed, sv_changed,
1067                          exec_insn_valid_i, exec_insn_ready_o,
1068                          exec_pc_valid_o, exec_pc_ready_i)
1069
1070         # whatever was done above, over-ride it if core reset is held
1071         with m.If(core_rst):
1072             sync += nia.eq(0)
1073
1074         # this bit doesn't have to be in the FSM: connect up to read
1075         # regfiles on demand from DMI
1076         self.do_dmi(m, dbg)
1077
1078         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1079         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1080         self.tb_dec_fsm(m, cur_state.dec)
1081
1082         return m
1083
1084     def do_dmi(self, m, dbg):
1085         """deals with DMI debug requests
1086
1087         currently only provides read requests for the INT regfile, CR and XER
1088         it will later also deal with *writing* to these regfiles.
1089         """
1090         comb = m.d.comb
1091         sync = m.d.sync
1092         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1093         intrf = self.core.regs.rf['int']
1094
1095         with m.If(d_reg.req): # request for regfile access being made
1096             # TODO: error-check this
1097             # XXX should this be combinatorial?  sync better?
1098             if intrf.unary:
1099                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1100             else:
1101                 comb += self.int_r.addr.eq(d_reg.addr)
1102                 comb += self.int_r.ren.eq(1)
1103         d_reg_delay  = Signal()
1104         sync += d_reg_delay.eq(d_reg.req)
1105         with m.If(d_reg_delay):
1106             # data arrives one clock later
1107             comb += d_reg.data.eq(self.int_r.data_o)
1108             comb += d_reg.ack.eq(1)
1109
1110         # sigh same thing for CR debug
1111         with m.If(d_cr.req): # request for regfile access being made
1112             comb += self.cr_r.ren.eq(0b11111111) # enable all
1113         d_cr_delay  = Signal()
1114         sync += d_cr_delay.eq(d_cr.req)
1115         with m.If(d_cr_delay):
1116             # data arrives one clock later
1117             comb += d_cr.data.eq(self.cr_r.data_o)
1118             comb += d_cr.ack.eq(1)
1119
1120         # aaand XER...
1121         with m.If(d_xer.req): # request for regfile access being made
1122             comb += self.xer_r.ren.eq(0b111111) # enable all
1123         d_xer_delay  = Signal()
1124         sync += d_xer_delay.eq(d_xer.req)
1125         with m.If(d_xer_delay):
1126             # data arrives one clock later
1127             comb += d_xer.data.eq(self.xer_r.data_o)
1128             comb += d_xer.ack.eq(1)
1129
1130     def tb_dec_fsm(self, m, spr_dec):
1131         """tb_dec_fsm
1132
1133         this is a FSM for updating either dec or tb.  it runs alternately
1134         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1135         value to DEC, however the regfile has "passthrough" on it so this
1136         *should* be ok.
1137
1138         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1139         """
1140
1141         comb, sync = m.d.comb, m.d.sync
1142         fast_rf = self.core.regs.rf['fast']
1143         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1144         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1145
1146         with m.FSM() as fsm:
1147
1148             # initiates read of current DEC
1149             with m.State("DEC_READ"):
1150                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1151                 comb += fast_r_dectb.ren.eq(1)
1152                 m.next = "DEC_WRITE"
1153
1154             # waits for DEC read to arrive (1 cycle), updates with new value
1155             with m.State("DEC_WRITE"):
1156                 new_dec = Signal(64)
1157                 # TODO: MSR.LPCR 32-bit decrement mode
1158                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1159                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1160                 comb += fast_w_dectb.wen.eq(1)
1161                 comb += fast_w_dectb.data_i.eq(new_dec)
1162                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1163                 m.next = "TB_READ"
1164
1165             # initiates read of current TB
1166             with m.State("TB_READ"):
1167                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1168                 comb += fast_r_dectb.ren.eq(1)
1169                 m.next = "TB_WRITE"
1170
1171             # waits for read TB to arrive, initiates write of current TB
1172             with m.State("TB_WRITE"):
1173                 new_tb = Signal(64)
1174                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1175                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1176                 comb += fast_w_dectb.wen.eq(1)
1177                 comb += fast_w_dectb.data_i.eq(new_tb)
1178                 m.next = "DEC_READ"
1179
1180         return m
1181
1182     def __iter__(self):
1183         yield from self.pc_i.ports()
1184         yield self.pc_o
1185         yield self.memerr_o
1186         yield from self.core.ports()
1187         yield from self.imem.ports()
1188         yield self.core_bigendian_i
1189         yield self.busy_o
1190
1191     def ports(self):
1192         return list(self)
1193
1194     def external_ports(self):
1195         ports = self.pc_i.ports()
1196         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1197                 ]
1198
1199         if self.jtag_en:
1200             ports += list(self.jtag.external_ports())
1201         else:
1202             # don't add DMI if JTAG is enabled
1203             ports += list(self.dbg.dmi.ports())
1204
1205         ports += list(self.imem.ibus.fields.values())
1206         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1207
1208         if self.sram4x4k:
1209             for sram in self.sram4k:
1210                 ports += list(sram.bus.fields.values())
1211
1212         if self.xics:
1213             ports += list(self.xics_icp.bus.fields.values())
1214             ports += list(self.xics_ics.bus.fields.values())
1215             ports.append(self.int_level_i)
1216
1217         if self.gpio:
1218             ports += list(self.simple_gpio.bus.fields.values())
1219             ports.append(self.gpio_o)
1220
1221         return ports
1222
1223     def ports(self):
1224         return list(self)
1225
1226
1227 class TestIssuer(Elaboratable):
1228     def __init__(self, pspec):
1229         self.ti = TestIssuerInternal(pspec)
1230
1231         self.pll = DummyPLL(instance=True)
1232
1233         # PLL direct clock or not
1234         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1235         if self.pll_en:
1236             self.pll_test_o = Signal(reset_less=True)
1237             self.pll_vco_o = Signal(reset_less=True)
1238             self.clk_sel_i = Signal(reset_less=True)
1239
1240     def elaborate(self, platform):
1241         m = Module()
1242         comb = m.d.comb
1243
1244         # TestIssuer runs at direct clock
1245         m.submodules.ti = ti = self.ti
1246         cd_int = ClockDomain("coresync")
1247
1248         if self.pll_en:
1249             # ClockSelect runs at PLL output internal clock rate
1250             m.submodules.wrappll = pll = self.pll
1251
1252             # add clock domains from PLL
1253             cd_pll = ClockDomain("pllclk")
1254             m.domains += cd_pll
1255
1256             # PLL clock established.  has the side-effect of running clklsel
1257             # at the PLL's speed (see DomainRenamer("pllclk") above)
1258             pllclk = ClockSignal("pllclk")
1259             comb += pllclk.eq(pll.clk_pll_o)
1260
1261             # wire up external 24mhz to PLL
1262             comb += pll.clk_24_i.eq(ClockSignal())
1263
1264             # output 18 mhz PLL test signal, and analog oscillator out
1265             comb += self.pll_test_o.eq(pll.pll_test_o)
1266             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1267
1268             # input to pll clock selection
1269             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1270
1271             # now wire up ResetSignals.  don't mind them being in this domain
1272             pll_rst = ResetSignal("pllclk")
1273             comb += pll_rst.eq(ResetSignal())
1274
1275         # internal clock is set to selector clock-out.  has the side-effect of
1276         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1277         intclk = ClockSignal("coresync")
1278         if self.pll_en:
1279             comb += intclk.eq(pll.clk_pll_o)
1280         else:
1281             comb += intclk.eq(ClockSignal())
1282
1283         return m
1284
1285     def ports(self):
1286         return list(self.ti.ports()) + list(self.pll.ports()) + \
1287                [ClockSignal(), ResetSignal()]
1288
1289     def external_ports(self):
1290         ports = self.ti.external_ports()
1291         ports.append(ClockSignal())
1292         ports.append(ResetSignal())
1293         if self.pll_en:
1294             ports.append(self.clk_sel_i)
1295             ports.append(self.pll_test_o)
1296             ports.append(self.pll_vco_o)
1297         return ports
1298
1299
1300 if __name__ == '__main__':
1301     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1302              'spr': 1,
1303              'div': 1,
1304              'mul': 1,
1305              'shiftrot': 1
1306             }
1307     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1308                          imem_ifacetype='bare_wb',
1309                          addr_wid=48,
1310                          mask_wid=8,
1311                          reg_wid=64,
1312                          units=units)
1313     dut = TestIssuer(pspec)
1314     vl = main(dut, ports=dut.ports(), name="test_issuer")
1315
1316     if len(sys.argv) == 1:
1317         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1318         with open("test_issuer.il", "w") as f:
1319             f.write(vl)