src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmigen.lib.coding import PriorityEncoder
  25
  26 from openpower.decoder.power_decoder import create_pdecode
  27 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  28 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  29 from openpower.decoder.decode2execute1 import Data
  30 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  31                                      SVP64PredMode)
  32 from openpower.state import CoreState
  33 from openpower.consts import (CR, SVP64CROffs)
  34 from soc.experiment.testmem import TestMemory # test only for instructions
  35 from soc.regfile.regfiles import StateRegs, FastRegs
  36 from soc.simple.core import NonProductionCore
  37 from soc.config.test.test_loadstore import TestMemPspec
  38 from soc.config.ifetch import ConfigFetchUnit
  39 from soc.debug.dmi import CoreDebug, DMIInterface
  40 from soc.debug.jtag import JTAG
  41 from soc.config.pinouts import get_pinspecs
  42 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  43 from soc.bus.simple_gpio import SimpleGPIO
  44 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  45 from soc.clock.select import ClockSelect
  46 from soc.clock.dummypll import DummyPLL
  47 from openpower.sv.svstate import SVSTATERec
  48
  49
  50 from nmutil.util import rising_edge
  51
  52 def get_insn(f_instr_o, pc):
  53     if f_instr_o.width == 32:
  54         return f_instr_o
  55     else:
  56         # 64-bit: bit 2 of pc decides which word to select
  57         return f_instr_o.word_select(pc[2], 32)
  58
  59 # gets state input or reads from state regfile
  60 def state_get(m, core_rst, state_i, name, regfile, regnum):
  61     comb = m.d.comb
  62     sync = m.d.sync
  63     # read the PC
  64     res = Signal(64, reset_less=True, name=name)
  65     res_ok_delay = Signal(name="%s_ok_delay" % name)
  66     with m.If(~core_rst):
  67         sync += res_ok_delay.eq(~state_i.ok)
  68         with m.If(state_i.ok):
  69             # incoming override (start from pc_i)
  70             comb += res.eq(state_i.data)
  71         with m.Else():
  72             # otherwise read StateRegs regfile for PC...
  73             comb += regfile.ren.eq(1<<regnum)
  74         # ... but on a 1-clock delay
  75         with m.If(res_ok_delay):
  76             comb += res.eq(regfile.data_o)
  77     return res
  78
  79 def get_predint(m, mask, name):
  80     """decode SVP64 predicate integer mask field to reg number and invert
  81     this is identical to the equivalent function in ISACaller except that
  82     it doesn't read the INT directly, it just decodes "what needs to be done"
  83     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  84
  85     * all1s is set to indicate that no mask is to be applied.
  86     * regread indicates the GPR register number to be read
  87     * invert is set to indicate that the register value is to be inverted
  88     * unary indicates that the contents of the register is to be shifted 1<<r3
  89     """
  90     comb = m.d.comb
  91     regread = Signal(5, name=name+"regread")
  92     invert = Signal(name=name+"invert")
  93     unary = Signal(name=name+"unary")
  94     all1s = Signal(name=name+"all1s")
  95     with m.Switch(mask):
  96         with m.Case(SVP64PredInt.ALWAYS.value):
  97             comb += all1s.eq(1)      # use 0b1111 (all ones)
  98         with m.Case(SVP64PredInt.R3_UNARY.value):
  99             comb += regread.eq(3)
 100             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 101         with m.Case(SVP64PredInt.R3.value):
 102             comb += regread.eq(3)
 103         with m.Case(SVP64PredInt.R3_N.value):
 104             comb += regread.eq(3)
 105             comb += invert.eq(1)
 106         with m.Case(SVP64PredInt.R10.value):
 107             comb += regread.eq(10)
 108         with m.Case(SVP64PredInt.R10_N.value):
 109             comb += regread.eq(10)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R30.value):
 112             comb += regread.eq(30)
 113         with m.Case(SVP64PredInt.R30_N.value):
 114             comb += regread.eq(30)
 115             comb += invert.eq(1)
 116     return regread, invert, unary, all1s
 117
 118 def get_predcr(m, mask, name):
 119     """decode SVP64 predicate CR to reg number field and invert status
 120     this is identical to _get_predcr in ISACaller
 121     """
 122     comb = m.d.comb
 123     idx = Signal(2, name=name+"idx")
 124     invert = Signal(name=name+"crinvert")
 125     with m.Switch(mask):
 126         with m.Case(SVP64PredCR.LT.value):
 127             comb += idx.eq(CR.LT)
 128             comb += invert.eq(0)
 129         with m.Case(SVP64PredCR.GE.value):
 130             comb += idx.eq(CR.LT)
 131             comb += invert.eq(1)
 132         with m.Case(SVP64PredCR.GT.value):
 133             comb += idx.eq(CR.GT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.LE.value):
 136             comb += idx.eq(CR.GT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.EQ.value):
 139             comb += idx.eq(CR.EQ)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.NE.value):
 142             comb += idx.eq(CR.EQ)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.SO.value):
 145             comb += idx.eq(CR.SO)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NS.value):
 148             comb += idx.eq(CR.SO)
 149             comb += invert.eq(1)
 150     return idx, invert
 151
 152
 153 class TestIssuerInternal(Elaboratable):
 154     """TestIssuer - reads instructions from TestMemory and issues them
 155
 156     efficiency and speed is not the main goal here: functional correctness
 157     and code clarity is.  optimisations (which almost 100% interfere with
 158     easy understanding) come later.
 159     """
 160     def __init__(self, pspec):
 161
 162         # test is SVP64 is to be enabled
 163         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 164
 165         # and if regfiles are reduced
 166         self.regreduce_en = (hasattr(pspec, "regreduce") and
 167                                             (pspec.regreduce == True))
 168
 169         # JTAG interface.  add this right at the start because if it's
 170         # added it *modifies* the pspec, by adding enable/disable signals
 171         # for parts of the rest of the core
 172         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 173         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 174         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 175         if self.jtag_en:
 176             # XXX MUST keep this up-to-date with litex, and
 177             # soc-cocotb-sim, and err.. all needs sorting out, argh
 178             subset = ['uart',
 179                       'mtwi',
 180                       'eint', 'gpio', 'mspi0',
 181                       # 'mspi1', - disabled for now
 182                       # 'pwm', 'sd0', - disabled for now
 183                        'sdr']
 184             self.jtag = JTAG(get_pinspecs(subset=subset),
 185                              domain=self.dbg_domain)
 186             # add signals to pspec to enable/disable icache and dcache
 187             # (or data and intstruction wishbone if icache/dcache not included)
 188             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 189             # TODO: do we actually care if these are not domain-synchronised?
 190             # honestly probably not.
 191             pspec.wb_icache_en = self.jtag.wb_icache_en
 192             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 193             self.wb_sram_en = self.jtag.wb_sram_en
 194         else:
 195             self.wb_sram_en = Const(1)
 196
 197         # add 4k sram blocks?
 198         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 199                          pspec.sram4x4kblock == True)
 200         if self.sram4x4k:
 201             self.sram4k = []
 202             for i in range(4):
 203                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 204                                                     #features={'err'}
 205                                                     ))
 206
 207         # add interrupt controller?
 208         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 209         if self.xics:
 210             self.xics_icp = XICS_ICP()
 211             self.xics_ics = XICS_ICS()
 212             self.int_level_i = self.xics_ics.int_level_i
 213
 214         # add GPIO peripheral?
 215         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 216         if self.gpio:
 217             self.simple_gpio = SimpleGPIO()
 218             self.gpio_o = self.simple_gpio.gpio_o
 219
 220         # main instruction core.  suitable for prototyping / demo only
 221         self.core = core = NonProductionCore(pspec)
 222         self.core_rst = ResetSignal("coresync")
 223
 224         # instruction decoder.  goes into Trap Record
 225         pdecode = create_pdecode()
 226         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 227         self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
 228                                      opkls=IssuerDecode2ToOperand,
 229                                      svp64_en=self.svp64_en,
 230                                      regreduce_en=self.regreduce_en)
 231         if self.svp64_en:
 232             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 233
 234         # Test Instruction memory
 235         self.imem = ConfigFetchUnit(pspec).fu
 236
 237         # DMI interface
 238         self.dbg = CoreDebug()
 239
 240         # instruction go/monitor
 241         self.pc_o = Signal(64, reset_less=True)
 242         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 243         self.svstate_i = Data(32, "svstate_i") # ditto
 244         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 245         self.busy_o = Signal(reset_less=True)
 246         self.memerr_o = Signal(reset_less=True)
 247
 248         # STATE regfile read /write ports for PC, MSR, SVSTATE
 249         staterf = self.core.regs.rf['state']
 250         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 251         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 252         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 253         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 254         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 255
 256         # DMI interface access
 257         intrf = self.core.regs.rf['int']
 258         crrf = self.core.regs.rf['cr']
 259         xerrf = self.core.regs.rf['xer']
 260         self.int_r = intrf.r_ports['dmi'] # INT read
 261         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 262         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 263
 264         if self.svp64_en:
 265             # for predication
 266             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 267             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 268
 269         # hack method of keeping an eye on whether branch/trap set the PC
 270         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 271         self.state_nia.wen.name = 'state_nia_wen'
 272
 273         # pulse to synchronize the simulator at instruction end
 274         self.insn_done = Signal()
 275
 276         if self.svp64_en:
 277             # store copies of predicate masks
 278             self.srcmask = Signal(64)
 279             self.dstmask = Signal(64)
 280
 281     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
 282                         fetch_pc_ready_o, fetch_pc_valid_i,
 283                         fetch_insn_valid_o, fetch_insn_ready_i):
 284         """fetch FSM
 285
 286         this FSM performs fetch of raw instruction data, partial-decodes
 287         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 288         read a 2nd 32-bit quantity if that occurs.
 289         """
 290         comb = m.d.comb
 291         sync = m.d.sync
 292         pdecode2 = self.pdecode2
 293         cur_state = self.cur_state
 294         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 295
 296         msr_read = Signal(reset=1)
 297
 298         with m.FSM(name='fetch_fsm'):
 299
 300             # waiting (zzz)
 301             with m.State("IDLE"):
 302                 comb += fetch_pc_ready_o.eq(1)
 303                 with m.If(fetch_pc_valid_i):
 304                     # instruction allowed to go: start by reading the PC
 305                     # capture the PC and also drop it into Insn Memory
 306                     # we have joined a pair of combinatorial memory
 307                     # lookups together.  this is Generally Bad.
 308                     comb += self.imem.a_pc_i.eq(pc)
 309                     comb += self.imem.a_valid_i.eq(1)
 310                     comb += self.imem.f_valid_i.eq(1)
 311                     sync += cur_state.pc.eq(pc)
 312                     sync += cur_state.svstate.eq(svstate) # and svstate
 313
 314                     # initiate read of MSR. arrives one clock later
 315                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 316                     sync += msr_read.eq(0)
 317
 318                     m.next = "INSN_READ"  # move to "wait for bus" phase
 319
 320             # dummy pause to find out why simulation is not keeping up
 321             with m.State("INSN_READ"):
 322                 # one cycle later, msr/sv read arrives.  valid only once.
 323                 with m.If(~msr_read):
 324                     sync += msr_read.eq(1) # yeah don't read it again
 325                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
 326                 with m.If(self.imem.f_busy_o): # zzz...
 327                     # busy: stay in wait-read
 328                     comb += self.imem.a_valid_i.eq(1)
 329                     comb += self.imem.f_valid_i.eq(1)
 330                 with m.Else():
 331                     # not busy: instruction fetched
 332                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 333                     if self.svp64_en:
 334                         svp64 = self.svp64
 335                         # decode the SVP64 prefix, if any
 336                         comb += svp64.raw_opcode_in.eq(insn)
 337                         comb += svp64.bigendian.eq(self.core_bigendian_i)
 338                         # pass the decoded prefix (if any) to PowerDecoder2
 339                         sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 340                         sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 341                         # remember whether this is a prefixed instruction, so
 342                         # the FSM can readily loop when VL==0
 343                         sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 344                         # calculate the address of the following instruction
 345                         insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 346                         sync += nia.eq(cur_state.pc + insn_size)
 347                         with m.If(~svp64.is_svp64_mode):
 348                             # with no prefix, store the instruction
 349                             # and hand it directly to the next FSM
 350                             sync += dec_opcode_i.eq(insn)
 351                             m.next = "INSN_READY"
 352                         with m.Else():
 353                             # fetch the rest of the instruction from memory
 354                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 355                             comb += self.imem.a_valid_i.eq(1)
 356                             comb += self.imem.f_valid_i.eq(1)
 357                             m.next = "INSN_READ2"
 358                     else:
 359                         # not SVP64 - 32-bit only
 360                         sync += nia.eq(cur_state.pc + 4)
 361                         sync += dec_opcode_i.eq(insn)
 362                         m.next = "INSN_READY"
 363
 364             with m.State("INSN_READ2"):
 365                 with m.If(self.imem.f_busy_o):  # zzz...
 366                     # busy: stay in wait-read
 367                     comb += self.imem.a_valid_i.eq(1)
 368                     comb += self.imem.f_valid_i.eq(1)
 369                 with m.Else():
 370                     # not busy: instruction fetched
 371                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 372                     sync += dec_opcode_i.eq(insn)
 373                     m.next = "INSN_READY"
 374                     # TODO: probably can start looking at pdecode2.rm_dec
 375                     # here or maybe even in INSN_READ state, if svp64_mode
 376                     # detected, in order to trigger - and wait for - the
 377                     # predicate reading.
 378                     if self.svp64_en:
 379                         pmode = pdecode2.rm_dec.predmode
 380                     """
 381                     if pmode != SVP64PredMode.ALWAYS.value:
 382                         fire predicate loading FSM and wait before
 383                         moving to INSN_READY
 384                     else:
 385                         sync += self.srcmask.eq(-1) # set to all 1s
 386                         sync += self.dstmask.eq(-1) # set to all 1s
 387                         m.next = "INSN_READY"
 388                     """
 389
 390             with m.State("INSN_READY"):
 391                 # hand over the instruction, to be decoded
 392                 comb += fetch_insn_valid_o.eq(1)
 393                 with m.If(fetch_insn_ready_i):
 394                     m.next = "IDLE"
 395
 396     def fetch_predicate_fsm(self, m,
 397                             pred_insn_valid_i, pred_insn_ready_o,
 398                             pred_mask_valid_o, pred_mask_ready_i):
 399         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 400            src/dest predicate masks
 401
 402         https://bugs.libre-soc.org/show_bug.cgi?id=617
 403         the predicates can be read here, by using IntRegs r_ports['pred']
 404         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 405         be done through multiple reads, extracting one relevant at a time.
 406         later, a faster way would be to use the 32-bit-wide CR port but
 407         this is more complex decoding, here.  equivalent code used in
 408         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 409
 410         note: this ENTIRE FSM is not to be called when svp64 is disabled
 411         """
 412         comb = m.d.comb
 413         sync = m.d.sync
 414         pdecode2 = self.pdecode2
 415         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 416         predmode = rm_dec.predmode
 417         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 418         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 419         # get src/dst step, so we can skip already used mask bits
 420         cur_state = self.cur_state
 421         srcstep = cur_state.svstate.srcstep
 422         dststep = cur_state.svstate.dststep
 423         cur_vl = cur_state.svstate.vl
 424
 425         # decode predicates
 426         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 427         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 428         sidx, scrinvert = get_predcr(m, srcpred, 's')
 429         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 430
 431         # store fetched masks, for either intpred or crpred
 432         # when src/dst step is not zero, the skipped mask bits need to be
 433         # shifted-out, before actually storing them in src/dest mask
 434         new_srcmask = Signal(64, reset_less=True)
 435         new_dstmask = Signal(64, reset_less=True)
 436
 437         with m.FSM(name="fetch_predicate"):
 438
 439             with m.State("FETCH_PRED_IDLE"):
 440                 comb += pred_insn_ready_o.eq(1)
 441                 with m.If(pred_insn_valid_i):
 442                     with m.If(predmode == SVP64PredMode.INT):
 443                         # skip fetching destination mask register, when zero
 444                         with m.If(dall1s):
 445                             sync += new_dstmask.eq(-1)
 446                             # directly go to fetch source mask register
 447                             # guaranteed not to be zero (otherwise predmode
 448                             # would be SVP64PredMode.ALWAYS, not INT)
 449                             comb += int_pred.addr.eq(sregread)
 450                             comb += int_pred.ren.eq(1)
 451                             m.next = "INT_SRC_READ"
 452                         # fetch destination predicate register
 453                         with m.Else():
 454                             comb += int_pred.addr.eq(dregread)
 455                             comb += int_pred.ren.eq(1)
 456                             m.next = "INT_DST_READ"
 457                     with m.Elif(predmode == SVP64PredMode.CR):
 458                         # go fetch masks from the CR register file
 459                         sync += new_srcmask.eq(0)
 460                         sync += new_dstmask.eq(0)
 461                         m.next = "CR_READ"
 462                     with m.Else():
 463                         sync += self.srcmask.eq(-1)
 464                         sync += self.dstmask.eq(-1)
 465                         m.next = "FETCH_PRED_DONE"
 466
 467             with m.State("INT_DST_READ"):
 468                 # store destination mask
 469                 inv = Repl(dinvert, 64)
 470                 with m.If(dunary):
 471                     # set selected mask bit for 1<<r3 mode
 472                     dst_shift = Signal(range(64))
 473                     comb += dst_shift.eq(self.int_pred.data_o & 0b111111)
 474                     sync += new_dstmask.eq(1 << dst_shift)
 475                 with m.Else():
 476                     # invert mask if requested
 477                     sync += new_dstmask.eq(self.int_pred.data_o ^ inv)
 478                 # skip fetching source mask register, when zero
 479                 with m.If(sall1s):
 480                     sync += new_srcmask.eq(-1)
 481                     m.next = "FETCH_PRED_SHIFT_MASK"
 482                 # fetch source predicate register
 483                 with m.Else():
 484                     comb += int_pred.addr.eq(sregread)
 485                     comb += int_pred.ren.eq(1)
 486                     m.next = "INT_SRC_READ"
 487
 488             with m.State("INT_SRC_READ"):
 489                 # store source mask
 490                 inv = Repl(sinvert, 64)
 491                 with m.If(sunary):
 492                     # set selected mask bit for 1<<r3 mode
 493                     src_shift = Signal(range(64))
 494                     comb += src_shift.eq(self.int_pred.data_o & 0b111111)
 495                     sync += new_srcmask.eq(1 << src_shift)
 496                 with m.Else():
 497                     # invert mask if requested
 498                     sync += new_srcmask.eq(self.int_pred.data_o ^ inv)
 499                 m.next = "FETCH_PRED_SHIFT_MASK"
 500
 501             # fetch masks from the CR register file
 502             # implements the following loop:
 503             # idx, inv = get_predcr(mask)
 504             # mask = 0
 505             # for cr_idx in range(vl):
 506             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 507             #     if cr[idx] ^ inv:
 508             #         mask |= 1 << cr_idx
 509             # return mask
 510             with m.State("CR_READ"):
 511                 # CR index to be read, which will be ready by the next cycle
 512                 cr_idx = Signal.like(cur_vl, reset_less=True)
 513                 # submit the read operation to the regfile
 514                 with m.If(cr_idx != cur_vl):
 515                     # the CR read port is unary ...
 516                     # ren = 1 << cr_idx
 517                     # ... in MSB0 convention ...
 518                     # ren = 1 << (7 - cr_idx)
 519                     # ... and with an offset:
 520                     # ren = 1 << (7 - off - cr_idx)
 521                     idx = SVP64CROffs.CRPred + cr_idx
 522                     comb += cr_pred.ren.eq(1 << (7 - idx))
 523                     # signal data valid in the next cycle
 524                     cr_read = Signal(reset_less=True)
 525                     sync += cr_read.eq(1)
 526                     # load the next index
 527                     sync += cr_idx.eq(cr_idx + 1)
 528                 with m.Else():
 529                     # exit on loop end
 530                     sync += cr_read.eq(0)
 531                     sync += cr_idx.eq(0)
 532                     m.next = "FETCH_PRED_SHIFT_MASK"
 533                 with m.If(cr_read):
 534                     # compensate for the one cycle delay on the regfile
 535                     cur_cr_idx = Signal.like(cur_vl)
 536                     comb += cur_cr_idx.eq(cr_idx - 1)
 537                     # read the CR field, select the appropriate bit
 538                     cr_field = Signal(4)
 539                     scr_bit = Signal()
 540                     dcr_bit = Signal()
 541                     comb += cr_field.eq(cr_pred.data_o)
 542                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 543                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 544                     # set the corresponding mask bit
 545                     bit_to_set = Signal.like(self.srcmask)
 546                     comb += bit_to_set.eq(1 << cur_cr_idx)
 547                     with m.If(scr_bit):
 548                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 549                     with m.If(dcr_bit):
 550                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 551
 552             with m.State("FETCH_PRED_SHIFT_MASK"):
 553                 # shift-out skipped mask bits
 554                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 555                 sync += self.dstmask.eq(new_dstmask >> dststep)
 556                 m.next = "FETCH_PRED_DONE"
 557
 558             with m.State("FETCH_PRED_DONE"):
 559                 comb += pred_mask_valid_o.eq(1)
 560                 with m.If(pred_mask_ready_i):
 561                     m.next = "FETCH_PRED_IDLE"
 562
 563     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 564                   dbg, core_rst, is_svp64_mode,
 565                   fetch_pc_ready_o, fetch_pc_valid_i,
 566                   fetch_insn_valid_o, fetch_insn_ready_i,
 567                   pred_insn_valid_i, pred_insn_ready_o,
 568                   pred_mask_valid_o, pred_mask_ready_i,
 569                   exec_insn_valid_i, exec_insn_ready_o,
 570                   exec_pc_valid_o, exec_pc_ready_i):
 571         """issue FSM
 572
 573         decode / issue FSM.  this interacts with the "fetch" FSM
 574         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 575         (outgoing). also interacts with the "execute" FSM
 576         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 577         (incoming).
 578         SVP64 RM prefixes have already been set up by the
 579         "fetch" phase, so execute is fairly straightforward.
 580         """
 581
 582         comb = m.d.comb
 583         sync = m.d.sync
 584         pdecode2 = self.pdecode2
 585         cur_state = self.cur_state
 586
 587         # temporaries
 588         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 589
 590         # for updating svstate (things like srcstep etc.)
 591         update_svstate = Signal() # set this (below) if updating
 592         new_svstate = SVSTATERec("new_svstate")
 593         comb += new_svstate.eq(cur_state.svstate)
 594
 595         # precalculate srcstep+1 and dststep+1
 596         cur_srcstep = cur_state.svstate.srcstep
 597         cur_dststep = cur_state.svstate.dststep
 598         next_srcstep = Signal.like(cur_srcstep)
 599         next_dststep = Signal.like(cur_dststep)
 600         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 601         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 602
 603         # note if an exception happened.  in a pipelined or OoO design
 604         # this needs to be accompanied by "shadowing" (or stalling)
 605         el = []
 606         for exc in core.fus.excs.values():
 607             el.append(exc.happened)
 608         exc_happened = Signal()
 609         if len(el) > 0: # at least one exception
 610             comb += exc_happened.eq(Cat(*el).bool())
 611
 612         with m.FSM(name="issue_fsm"):
 613
 614             # sync with the "fetch" phase which is reading the instruction
 615             # at this point, there is no instruction running, that
 616             # could inadvertently update the PC.
 617             with m.State("ISSUE_START"):
 618                 # wait on "core stop" release, before next fetch
 619                 # need to do this here, in case we are in a VL==0 loop
 620                 with m.If(~dbg.core_stop_o & ~core_rst):
 621                     comb += fetch_pc_valid_i.eq(1) # tell fetch to start
 622                     with m.If(fetch_pc_ready_o):   # fetch acknowledged us
 623                         m.next = "INSN_WAIT"
 624                 with m.Else():
 625                     # tell core it's stopped, and acknowledge debug handshake
 626                     comb += dbg.core_stopped_i.eq(1)
 627                     # while stopped, allow updating the PC and SVSTATE
 628                     with m.If(self.pc_i.ok):
 629                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 630                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 631                         sync += pc_changed.eq(1)
 632                     with m.If(self.svstate_i.ok):
 633                         comb += new_svstate.eq(self.svstate_i.data)
 634                         comb += update_svstate.eq(1)
 635                         sync += sv_changed.eq(1)
 636
 637             # wait for an instruction to arrive from Fetch
 638             with m.State("INSN_WAIT"):
 639                 comb += fetch_insn_ready_i.eq(1)
 640                 with m.If(fetch_insn_valid_o):
 641                     # loop into ISSUE_START if it's a SVP64 instruction
 642                     # and VL == 0.  this because VL==0 is a for-loop
 643                     # from 0 to 0 i.e. always, always a NOP.
 644                     cur_vl = cur_state.svstate.vl
 645                     with m.If(is_svp64_mode & (cur_vl == 0)):
 646                         # update the PC before fetching the next instruction
 647                         # since we are in a VL==0 loop, no instruction was
 648                         # executed that we could be overwriting
 649                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 650                         comb += self.state_w_pc.data_i.eq(nia)
 651                         comb += self.insn_done.eq(1)
 652                         m.next = "ISSUE_START"
 653                     with m.Else():
 654                         if self.svp64_en:
 655                             m.next = "PRED_START"  # start fetching predicate
 656                         else:
 657                             m.next = "DECODE_SV"  # skip predication
 658
 659             with m.State("PRED_START"):
 660                 comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
 661                 with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
 662                     m.next = "MASK_WAIT"
 663
 664             with m.State("MASK_WAIT"):
 665                 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
 666                 with m.If(pred_mask_valid_o): # predication masks are ready
 667                     m.next = "PRED_SKIP"
 668
 669             # skip zeros in predicate
 670             with m.State("PRED_SKIP"):
 671                 with m.If(~is_svp64_mode):
 672                     m.next = "DECODE_SV"  # nothing to do
 673                 with m.Else():
 674                     if self.svp64_en:
 675                         pred_src_zero = pdecode2.rm_dec.pred_sz
 676                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 677
 678                         # new srcstep, after skipping zeros
 679                         skip_srcstep = Signal.like(cur_srcstep)
 680                         # value to be added to the current srcstep
 681                         src_delta = Signal.like(cur_srcstep)
 682                         # add leading zeros to srcstep, if not in zero mode
 683                         with m.If(~pred_src_zero):
 684                             # priority encoder (count leading zeros)
 685                             # append guard bit, in case the mask is all zeros
 686                             pri_enc_src = PriorityEncoder(65)
 687                             m.submodules.pri_enc_src = pri_enc_src
 688                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 689                                                          Const(1, 1)))
 690                             comb += src_delta.eq(pri_enc_src.o)
 691                         # apply delta to srcstep
 692                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 693                         # shift-out all leading zeros from the mask
 694                         # plus the leading "one" bit
 695                         # TODO count leading zeros and shift-out the zero
 696                         #      bits, in the same step, in hardware
 697                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 698
 699                         # same as above, but for dststep
 700                         skip_dststep = Signal.like(cur_dststep)
 701                         dst_delta = Signal.like(cur_dststep)
 702                         with m.If(~pred_dst_zero):
 703                             pri_enc_dst = PriorityEncoder(65)
 704                             m.submodules.pri_enc_dst = pri_enc_dst
 705                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 706                                                          Const(1, 1)))
 707                             comb += dst_delta.eq(pri_enc_dst.o)
 708                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 709                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 710
 711                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 712                         with m.If((skip_srcstep >= cur_vl) |
 713                                   (skip_dststep >= cur_vl)):
 714                             # end of VL loop. Update PC and reset src/dst step
 715                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 716                             comb += self.state_w_pc.data_i.eq(nia)
 717                             comb += new_svstate.srcstep.eq(0)
 718                             comb += new_svstate.dststep.eq(0)
 719                             comb += update_svstate.eq(1)
 720                             # synchronize with the simulator
 721                             comb += self.insn_done.eq(1)
 722                             # go back to Issue
 723                             m.next = "ISSUE_START"
 724                         with m.Else():
 725                             # update new src/dst step
 726                             comb += new_svstate.srcstep.eq(skip_srcstep)
 727                             comb += new_svstate.dststep.eq(skip_dststep)
 728                             comb += update_svstate.eq(1)
 729                             # proceed to Decode
 730                             m.next = "DECODE_SV"
 731
 732                         # pass predicate mask bits through to satellite decoders
 733                         # TODO: for SIMD this will be *multiple* bits
 734                         sync += core.sv_pred_sm.eq(self.srcmask[0])
 735                         sync += core.sv_pred_dm.eq(self.dstmask[0])
 736
 737             # after src/dst step have been updated, we are ready
 738             # to decode the instruction
 739             with m.State("DECODE_SV"):
 740                 # decode the instruction
 741                 sync += core.e.eq(pdecode2.e)
 742                 sync += core.state.eq(cur_state)
 743                 sync += core.raw_insn_i.eq(dec_opcode_i)
 744                 sync += core.bigendian_i.eq(self.core_bigendian_i)
 745                 if self.svp64_en:
 746                     sync += core.sv_rm.eq(pdecode2.sv_rm)
 747                     # set RA_OR_ZERO detection in satellite decoders
 748                     sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
 749                     # and svp64 detection
 750                     sync += core.is_svp64_mode.eq(is_svp64_mode)
 751                     # and svp64 bit-rev'd ldst mode
 752                     ldst_dec = pdecode2.use_svp64_ldst_dec
 753                     sync += core.use_svp64_ldst_dec.eq(ldst_dec)
 754
 755                 m.next = "INSN_EXECUTE"  # move to "execute"
 756
 757             # handshake with execution FSM, move to "wait" once acknowledged
 758             with m.State("INSN_EXECUTE"):
 759                 comb += exec_insn_valid_i.eq(1) # trigger execute
 760                 with m.If(exec_insn_ready_o):   # execute acknowledged us
 761                     m.next = "EXECUTE_WAIT"
 762
 763             with m.State("EXECUTE_WAIT"):
 764                 # wait on "core stop" release, at instruction end
 765                 # need to do this here, in case we are in a VL>1 loop
 766                 with m.If(~dbg.core_stop_o & ~core_rst):
 767                     comb += exec_pc_ready_i.eq(1)
 768                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 769                     #with m.If(exec_pc_valid_o & exc_happened):
 770                     #    probably something like this:
 771                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
 772                     # TODO: the exception info needs to be blatted
 773                     # into pdecode.ldst_exc, and the instruction "re-run".
 774                     # when ldst_exc.happened is set, the PowerDecoder2
 775                     # reacts very differently: it re-writes the instruction
 776                     # with a "trap" (calls PowerDecoder2.trap()) which
 777                     # will *overwrite* whatever was requested and jump the
 778                     # PC to the exception address, as well as alter MSR.
 779                     # nothing else needs to be done other than to note
 780                     # the change of PC and MSR (and, later, SVSTATE)
 781                     #with m.Elif(exec_pc_valid_o):
 782                     with m.If(exec_pc_valid_o): # replace with Elif (above)
 783
 784                         # was this the last loop iteration?
 785                         is_last = Signal()
 786                         cur_vl = cur_state.svstate.vl
 787                         comb += is_last.eq(next_srcstep == cur_vl)
 788
 789                         # if either PC or SVSTATE were changed by the previous
 790                         # instruction, go directly back to Fetch, without
 791                         # updating either PC or SVSTATE
 792                         with m.If(pc_changed | sv_changed):
 793                             m.next = "ISSUE_START"
 794
 795                         # also return to Fetch, when no output was a vector
 796                         # (regardless of SRCSTEP and VL), or when the last
 797                         # instruction was really the last one of the VL loop
 798                         with m.Elif((~pdecode2.loop_continue) | is_last):
 799                             # before going back to fetch, update the PC state
 800                             # register with the NIA.
 801                             # ok here we are not reading the branch unit.
 802                             # TODO: this just blithely overwrites whatever
 803                             #       pipeline updated the PC
 804                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 805                             comb += self.state_w_pc.data_i.eq(nia)
 806                             # reset SRCSTEP before returning to Fetch
 807                             if self.svp64_en:
 808                                 with m.If(pdecode2.loop_continue):
 809                                     comb += new_svstate.srcstep.eq(0)
 810                                     comb += new_svstate.dststep.eq(0)
 811                                     comb += update_svstate.eq(1)
 812                             else:
 813                                 comb += new_svstate.srcstep.eq(0)
 814                                 comb += new_svstate.dststep.eq(0)
 815                                 comb += update_svstate.eq(1)
 816                             m.next = "ISSUE_START"
 817
 818                         # returning to Execute? then, first update SRCSTEP
 819                         with m.Else():
 820                             comb += new_svstate.srcstep.eq(next_srcstep)
 821                             comb += new_svstate.dststep.eq(next_dststep)
 822                             comb += update_svstate.eq(1)
 823                             # return to mask skip loop
 824                             m.next = "PRED_SKIP"
 825
 826                 with m.Else():
 827                     comb += dbg.core_stopped_i.eq(1)
 828                     # while stopped, allow updating the PC and SVSTATE
 829                     with m.If(self.pc_i.ok):
 830                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 831                         comb += self.state_w_pc.data_i.eq(self.pc_i.data)
 832                         sync += pc_changed.eq(1)
 833                     with m.If(self.svstate_i.ok):
 834                         comb += new_svstate.eq(self.svstate_i.data)
 835                         comb += update_svstate.eq(1)
 836                         sync += sv_changed.eq(1)
 837
 838         # check if svstate needs updating: if so, write it to State Regfile
 839         with m.If(update_svstate):
 840             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 841             comb += self.state_w_sv.data_i.eq(new_svstate)
 842             sync += cur_state.svstate.eq(new_svstate) # for next clock
 843
 844     def execute_fsm(self, m, core, pc_changed, sv_changed,
 845                     exec_insn_valid_i, exec_insn_ready_o,
 846                     exec_pc_valid_o, exec_pc_ready_i):
 847         """execute FSM
 848
 849         execute FSM. this interacts with the "issue" FSM
 850         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 851         (outgoing). SVP64 RM prefixes have already been set up by the
 852         "issue" phase, so execute is fairly straightforward.
 853         """
 854
 855         comb = m.d.comb
 856         sync = m.d.sync
 857         pdecode2 = self.pdecode2
 858
 859         # temporaries
 860         core_busy_o = core.busy_o                 # core is busy
 861         core_ivalid_i = core.ivalid_i             # instruction is valid
 862         core_issue_i = core.issue_i               # instruction is issued
 863         insn_type = core.e.do.insn_type           # instruction MicroOp type
 864
 865         with m.FSM(name="exec_fsm"):
 866
 867             # waiting for instruction bus (stays there until not busy)
 868             with m.State("INSN_START"):
 869                 comb += exec_insn_ready_o.eq(1)
 870                 with m.If(exec_insn_valid_i):
 871                     comb += core_ivalid_i.eq(1)  # instruction is valid
 872                     comb += core_issue_i.eq(1)  # and issued
 873                     sync += sv_changed.eq(0)
 874                     sync += pc_changed.eq(0)
 875                     m.next = "INSN_ACTIVE"  # move to "wait completion"
 876
 877             # instruction started: must wait till it finishes
 878             with m.State("INSN_ACTIVE"):
 879                 with m.If(insn_type != MicrOp.OP_NOP):
 880                     comb += core_ivalid_i.eq(1) # instruction is valid
 881                 # note changes to PC and SVSTATE
 882                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 883                     sync += sv_changed.eq(1)
 884                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 885                     sync += pc_changed.eq(1)
 886                 with m.If(~core_busy_o): # instruction done!
 887                     comb += exec_pc_valid_o.eq(1)
 888                     with m.If(exec_pc_ready_i):
 889                         comb += self.insn_done.eq(1)
 890                         m.next = "INSN_START"  # back to fetch
 891
 892     def setup_peripherals(self, m):
 893         comb, sync = m.d.comb, m.d.sync
 894
 895         # okaaaay so the debug module must be in coresync clock domain
 896         # but NOT its reset signal. to cope with this, set every single
 897         # submodule explicitly in coresync domain, debug and JTAG
 898         # in their own one but using *external* reset.
 899         csd = DomainRenamer("coresync")
 900         dbd = DomainRenamer(self.dbg_domain)
 901
 902         m.submodules.core = core = csd(self.core)
 903         m.submodules.imem = imem = csd(self.imem)
 904         m.submodules.dbg = dbg = dbd(self.dbg)
 905         if self.jtag_en:
 906             m.submodules.jtag = jtag = dbd(self.jtag)
 907             # TODO: UART2GDB mux, here, from external pin
 908             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 909             sync += dbg.dmi.connect_to(jtag.dmi)
 910
 911         cur_state = self.cur_state
 912
 913         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 914         if self.sram4x4k:
 915             for i, sram in enumerate(self.sram4k):
 916                 m.submodules["sram4k_%d" % i] = csd(sram)
 917                 comb += sram.enable.eq(self.wb_sram_en)
 918
 919         # XICS interrupt handler
 920         if self.xics:
 921             m.submodules.xics_icp = icp = csd(self.xics_icp)
 922             m.submodules.xics_ics = ics = csd(self.xics_ics)
 923             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 924             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
 925
 926         # GPIO test peripheral
 927         if self.gpio:
 928             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 929
 930         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 931         # XXX causes litex ECP5 test to get wrong idea about input and output
 932         # (but works with verilator sim *sigh*)
 933         #if self.gpio and self.xics:
 934         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 935
 936         # instruction decoder
 937         pdecode = create_pdecode()
 938         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 939         if self.svp64_en:
 940             m.submodules.svp64 = svp64 = csd(self.svp64)
 941
 942         # convenience
 943         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 944         intrf = self.core.regs.rf['int']
 945
 946         # clock delay power-on reset
 947         cd_por  = ClockDomain(reset_less=True)
 948         cd_sync = ClockDomain()
 949         core_sync = ClockDomain("coresync")
 950         m.domains += cd_por, cd_sync, core_sync
 951         if self.dbg_domain != "sync":
 952             dbg_sync = ClockDomain(self.dbg_domain)
 953             m.domains += dbg_sync
 954
 955         ti_rst = Signal(reset_less=True)
 956         delay = Signal(range(4), reset=3)
 957         with m.If(delay != 0):
 958             m.d.por += delay.eq(delay - 1)
 959         comb += cd_por.clk.eq(ClockSignal())
 960
 961         # power-on reset delay
 962         core_rst = ResetSignal("coresync")
 963         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 964         comb += core_rst.eq(ti_rst)
 965
 966         # debug clock is same as coresync, but reset is *main external*
 967         if self.dbg_domain != "sync":
 968             dbg_rst = ResetSignal(self.dbg_domain)
 969             comb += dbg_rst.eq(ResetSignal())
 970
 971         # busy/halted signals from core
 972         comb += self.busy_o.eq(core.busy_o)
 973         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 974
 975         # temporary hack: says "go" immediately for both address gen and ST
 976         l0 = core.l0
 977         ldst = core.fus.fus['ldst0']
 978         st_go_edge = rising_edge(m, ldst.st.rel_o)
 979         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 980         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 981
 982     def elaborate(self, platform):
 983         m = Module()
 984         # convenience
 985         comb, sync = m.d.comb, m.d.sync
 986         cur_state = self.cur_state
 987         pdecode2 = self.pdecode2
 988         dbg = self.dbg
 989         core = self.core
 990
 991         # set up peripherals and core
 992         core_rst = self.core_rst
 993         self.setup_peripherals(m)
 994
 995         # reset current state if core reset requested
 996         with m.If(core_rst):
 997             m.d.sync += self.cur_state.eq(0)
 998
 999         # PC and instruction from I-Memory
1000         comb += self.pc_o.eq(cur_state.pc)
1001         pc_changed = Signal() # note write to PC
1002         sv_changed = Signal() # note write to SVSTATE
1003
1004         # read state either from incoming override or from regfile
1005         # TODO: really should be doing MSR in the same way
1006         pc = state_get(m, core_rst, self.pc_i,
1007                             "pc",                  # read PC
1008                             self.state_r_pc, StateRegs.PC)
1009         svstate = state_get(m, core_rst, self.svstate_i,
1010                             "svstate",   # read SVSTATE
1011                             self.state_r_sv, StateRegs.SVSTATE)
1012
1013         # don't write pc every cycle
1014         comb += self.state_w_pc.wen.eq(0)
1015         comb += self.state_w_pc.data_i.eq(0)
1016
1017         # don't read msr every cycle
1018         comb += self.state_r_msr.ren.eq(0)
1019
1020         # address of the next instruction, in the absence of a branch
1021         # depends on the instruction size
1022         nia = Signal(64)
1023
1024         # connect up debug signals
1025         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1026         comb += dbg.terminate_i.eq(core.core_terminate_o)
1027         comb += dbg.state.pc.eq(pc)
1028         comb += dbg.state.svstate.eq(svstate)
1029         comb += dbg.state.msr.eq(cur_state.msr)
1030
1031         # pass the prefix mode from Fetch to Issue, so the latter can loop
1032         # on VL==0
1033         is_svp64_mode = Signal()
1034
1035         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1036         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1037         # these are the handshake signals between each
1038
1039         # fetch FSM can run as soon as the PC is valid
1040         fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
1041         fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
1042
1043         # fetch FSM hands over the instruction to be decoded / issued
1044         fetch_insn_valid_o = Signal()
1045         fetch_insn_ready_i = Signal()
1046
1047         # predicate fetch FSM decodes and fetches the predicate
1048         pred_insn_valid_i = Signal()
1049         pred_insn_ready_o = Signal()
1050
1051         # predicate fetch FSM delivers the masks
1052         pred_mask_valid_o = Signal()
1053         pred_mask_ready_i = Signal()
1054
1055         # issue FSM delivers the instruction to the be executed
1056         exec_insn_valid_i = Signal()
1057         exec_insn_ready_o = Signal()
1058
1059         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1060         exec_pc_valid_o = Signal()
1061         exec_pc_ready_i = Signal()
1062
1063         # the FSMs here are perhaps unusual in that they detect conditions
1064         # then "hold" information, combinatorially, for the core
1065         # (as opposed to using sync - which would be on a clock's delay)
1066         # this includes the actual opcode, valid flags and so on.
1067
1068         # Fetch, then predicate fetch, then Issue, then Execute.
1069         # Issue is where the VL for-loop # lives.  the ready/valid
1070         # signalling is used to communicate between the four.
1071
1072         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
1073                        fetch_pc_ready_o, fetch_pc_valid_i,
1074                        fetch_insn_valid_o, fetch_insn_ready_i)
1075
1076         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1077                        dbg, core_rst, is_svp64_mode,
1078                        fetch_pc_ready_o, fetch_pc_valid_i,
1079                        fetch_insn_valid_o, fetch_insn_ready_i,
1080                        pred_insn_valid_i, pred_insn_ready_o,
1081                        pred_mask_valid_o, pred_mask_ready_i,
1082                        exec_insn_valid_i, exec_insn_ready_o,
1083                        exec_pc_valid_o, exec_pc_ready_i)
1084
1085         if self.svp64_en:
1086             self.fetch_predicate_fsm(m,
1087                                      pred_insn_valid_i, pred_insn_ready_o,
1088                                      pred_mask_valid_o, pred_mask_ready_i)
1089
1090         self.execute_fsm(m, core, pc_changed, sv_changed,
1091                          exec_insn_valid_i, exec_insn_ready_o,
1092                          exec_pc_valid_o, exec_pc_ready_i)
1093
1094         # whatever was done above, over-ride it if core reset is held
1095         with m.If(core_rst):
1096             sync += nia.eq(0)
1097
1098         # this bit doesn't have to be in the FSM: connect up to read
1099         # regfiles on demand from DMI
1100         self.do_dmi(m, dbg)
1101
1102         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1103         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1104         self.tb_dec_fsm(m, cur_state.dec)
1105
1106         return m
1107
1108     def do_dmi(self, m, dbg):
1109         """deals with DMI debug requests
1110
1111         currently only provides read requests for the INT regfile, CR and XER
1112         it will later also deal with *writing* to these regfiles.
1113         """
1114         comb = m.d.comb
1115         sync = m.d.sync
1116         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1117         intrf = self.core.regs.rf['int']
1118
1119         with m.If(d_reg.req): # request for regfile access being made
1120             # TODO: error-check this
1121             # XXX should this be combinatorial?  sync better?
1122             if intrf.unary:
1123                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1124             else:
1125                 comb += self.int_r.addr.eq(d_reg.addr)
1126                 comb += self.int_r.ren.eq(1)
1127         d_reg_delay  = Signal()
1128         sync += d_reg_delay.eq(d_reg.req)
1129         with m.If(d_reg_delay):
1130             # data arrives one clock later
1131             comb += d_reg.data.eq(self.int_r.data_o)
1132             comb += d_reg.ack.eq(1)
1133
1134         # sigh same thing for CR debug
1135         with m.If(d_cr.req): # request for regfile access being made
1136             comb += self.cr_r.ren.eq(0b11111111) # enable all
1137         d_cr_delay  = Signal()
1138         sync += d_cr_delay.eq(d_cr.req)
1139         with m.If(d_cr_delay):
1140             # data arrives one clock later
1141             comb += d_cr.data.eq(self.cr_r.data_o)
1142             comb += d_cr.ack.eq(1)
1143
1144         # aaand XER...
1145         with m.If(d_xer.req): # request for regfile access being made
1146             comb += self.xer_r.ren.eq(0b111111) # enable all
1147         d_xer_delay  = Signal()
1148         sync += d_xer_delay.eq(d_xer.req)
1149         with m.If(d_xer_delay):
1150             # data arrives one clock later
1151             comb += d_xer.data.eq(self.xer_r.data_o)
1152             comb += d_xer.ack.eq(1)
1153
1154     def tb_dec_fsm(self, m, spr_dec):
1155         """tb_dec_fsm
1156
1157         this is a FSM for updating either dec or tb.  it runs alternately
1158         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1159         value to DEC, however the regfile has "passthrough" on it so this
1160         *should* be ok.
1161
1162         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1163         """
1164
1165         comb, sync = m.d.comb, m.d.sync
1166         fast_rf = self.core.regs.rf['fast']
1167         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1168         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1169
1170         with m.FSM() as fsm:
1171
1172             # initiates read of current DEC
1173             with m.State("DEC_READ"):
1174                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1175                 comb += fast_r_dectb.ren.eq(1)
1176                 m.next = "DEC_WRITE"
1177
1178             # waits for DEC read to arrive (1 cycle), updates with new value
1179             with m.State("DEC_WRITE"):
1180                 new_dec = Signal(64)
1181                 # TODO: MSR.LPCR 32-bit decrement mode
1182                 comb += new_dec.eq(fast_r_dectb.data_o - 1)
1183                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1184                 comb += fast_w_dectb.wen.eq(1)
1185                 comb += fast_w_dectb.data_i.eq(new_dec)
1186                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1187                 m.next = "TB_READ"
1188
1189             # initiates read of current TB
1190             with m.State("TB_READ"):
1191                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1192                 comb += fast_r_dectb.ren.eq(1)
1193                 m.next = "TB_WRITE"
1194
1195             # waits for read TB to arrive, initiates write of current TB
1196             with m.State("TB_WRITE"):
1197                 new_tb = Signal(64)
1198                 comb += new_tb.eq(fast_r_dectb.data_o + 1)
1199                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1200                 comb += fast_w_dectb.wen.eq(1)
1201                 comb += fast_w_dectb.data_i.eq(new_tb)
1202                 m.next = "DEC_READ"
1203
1204         return m
1205
1206     def __iter__(self):
1207         yield from self.pc_i.ports()
1208         yield self.pc_o
1209         yield self.memerr_o
1210         yield from self.core.ports()
1211         yield from self.imem.ports()
1212         yield self.core_bigendian_i
1213         yield self.busy_o
1214
1215     def ports(self):
1216         return list(self)
1217
1218     def external_ports(self):
1219         ports = self.pc_i.ports()
1220         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1221                 ]
1222
1223         if self.jtag_en:
1224             ports += list(self.jtag.external_ports())
1225         else:
1226             # don't add DMI if JTAG is enabled
1227             ports += list(self.dbg.dmi.ports())
1228
1229         ports += list(self.imem.ibus.fields.values())
1230         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1231
1232         if self.sram4x4k:
1233             for sram in self.sram4k:
1234                 ports += list(sram.bus.fields.values())
1235
1236         if self.xics:
1237             ports += list(self.xics_icp.bus.fields.values())
1238             ports += list(self.xics_ics.bus.fields.values())
1239             ports.append(self.int_level_i)
1240
1241         if self.gpio:
1242             ports += list(self.simple_gpio.bus.fields.values())
1243             ports.append(self.gpio_o)
1244
1245         return ports
1246
1247     def ports(self):
1248         return list(self)
1249
1250
1251 class TestIssuer(Elaboratable):
1252     def __init__(self, pspec):
1253         self.ti = TestIssuerInternal(pspec)
1254         self.pll = DummyPLL(instance=True)
1255
1256         # PLL direct clock or not
1257         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1258         if self.pll_en:
1259             self.pll_test_o = Signal(reset_less=True)
1260             self.pll_vco_o = Signal(reset_less=True)
1261             self.clk_sel_i = Signal(2, reset_less=True)
1262             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1263             self.pllclk_clk = ClockSignal("pllclk")
1264
1265     def elaborate(self, platform):
1266         m = Module()
1267         comb = m.d.comb
1268
1269         # TestIssuer nominally runs at main clock, actually it is
1270         # all combinatorial internally except for coresync'd components
1271         m.submodules.ti = ti = self.ti
1272
1273         if self.pll_en:
1274             # ClockSelect runs at PLL output internal clock rate
1275             m.submodules.wrappll = pll = self.pll
1276
1277             # add clock domains from PLL
1278             cd_pll = ClockDomain("pllclk")
1279             m.domains += cd_pll
1280
1281             # PLL clock established.  has the side-effect of running clklsel
1282             # at the PLL's speed (see DomainRenamer("pllclk") above)
1283             pllclk = self.pllclk_clk
1284             comb += pllclk.eq(pll.clk_pll_o)
1285
1286             # wire up external 24mhz to PLL
1287             #comb += pll.clk_24_i.eq(self.ref_clk)
1288             # output 18 mhz PLL test signal, and analog oscillator out
1289             comb += self.pll_test_o.eq(pll.pll_test_o)
1290             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1291
1292             # input to pll clock selection
1293             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1294
1295             # now wire up ResetSignals.  don't mind them being in this domain
1296             pll_rst = ResetSignal("pllclk")
1297             comb += pll_rst.eq(ResetSignal())
1298
1299         # internal clock is set to selector clock-out.  has the side-effect of
1300         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1301         # debug clock runs at coresync internal clock
1302         cd_coresync = ClockDomain("coresync")
1303         #m.domains += cd_coresync
1304         if self.ti.dbg_domain != 'sync':
1305             cd_dbgsync = ClockDomain("dbgsync")
1306             #m.domains += cd_dbgsync
1307         intclk = ClockSignal("coresync")
1308         dbgclk = ClockSignal(self.ti.dbg_domain)
1309         # XXX BYPASS PLL XXX
1310         # XXX BYPASS PLL XXX
1311         # XXX BYPASS PLL XXX
1312         if self.pll_en:
1313             comb += intclk.eq(self.ref_clk)
1314         else:
1315             comb += intclk.eq(ClockSignal())
1316         if self.ti.dbg_domain != 'sync':
1317             dbgclk = ClockSignal(self.ti.dbg_domain)
1318             comb += dbgclk.eq(intclk)
1319
1320         return m
1321
1322     def ports(self):
1323         return list(self.ti.ports()) + list(self.pll.ports()) + \
1324                [ClockSignal(), ResetSignal()]
1325
1326     def external_ports(self):
1327         ports = self.ti.external_ports()
1328         ports.append(ClockSignal())
1329         ports.append(ResetSignal())
1330         if self.pll_en:
1331             ports.append(self.clk_sel_i)
1332             ports.append(self.pll.clk_24_i)
1333             ports.append(self.pll_test_o)
1334             ports.append(self.pll_vco_o)
1335             ports.append(self.pllclk_clk)
1336             ports.append(self.ref_clk)
1337         return ports
1338
1339
1340 if __name__ == '__main__':
1341     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1342              'spr': 1,
1343              'div': 1,
1344              'mul': 1,
1345              'shiftrot': 1
1346             }
1347     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1348                          imem_ifacetype='bare_wb',
1349                          addr_wid=48,
1350                          mask_wid=8,
1351                          reg_wid=64,
1352                          units=units)
1353     dut = TestIssuer(pspec)
1354     vl = main(dut, ports=dut.ports(), name="test_issuer")
1355
1356     if len(sys.argv) == 1:
1357         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1358         with open("test_issuer.il", "w") as f:
1359             f.write(vl)