src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, ortreereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst_multi import LDSTCompUnit
  20 from soc.experiment.compldst_multi import CompLDSTOpSubset
  21 from soc.experiment.l0_cache import TstL0CacheBuffer
  22
  23 # for testing purposes
  24 from soc.config.test.test_loadstore import TestMemPspec
  25 from soc.experiment.alu_hier import ALUFunctionUnit, BranchALU
  26 from soc.fu.alu.alu_input_record import CompALUOpSubset
  27
  28 from openpower.decoder.power_enums import MicrOp, Function
  29 from openpower.decoder.power_decoder import (create_pdecode)
  30 from openpower.decoder.power_decoder2 import (PowerDecode2)
  31 from openpower.decoder.power_decoder2 import Decode2ToExecute1Type
  32
  33 from openpower.simulator.program import Program
  34
  35
  36 from nmutil.latch import SRLatch
  37 from nmutil.nmoperator import eq
  38
  39 from random import randint, seed
  40 from copy import deepcopy
  41 from math import log
  42
  43 from soc.experiment.sim import RegSim, MemSim
  44 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  45
  46
  47 class CompUnitsBase(Elaboratable):
  48     """ Computation Unit Base class.
  49
  50         Amazingly, this class works recursively.  It's supposed to just
  51         look after some ALUs (that can handle the same operations),
  52         grouping them together, however it turns out that the same code
  53         can also group *groups* of Computation Units together as well.
  54
  55         Basically it was intended just to concatenate the ALU's issue,
  56         go_rd etc. signals together, which start out as bits and become
  57         sequences.  Turns out that the same trick works just as well
  58         on Computation Units!
  59
  60         So this class may be used recursively to present a top-level
  61         sequential concatenation of all the signals in and out of
  62         ALUs, whilst at the same time making it convenient to group
  63         ALUs together.
  64
  65         At the lower level, the intent is that groups of (identical)
  66         ALUs may be passed the same operation.  Even beyond that,
  67         the intent is that that group of (identical) ALUs actually
  68         share the *same pipeline* and as such become a "Concurrent
  69         Computation Unit" as defined by Mitch Alsup (see section
  70         11.4.9.3)
  71     """
  72
  73     def __init__(self, rwid, units, ldstmode=False):
  74         """ Inputs:
  75
  76             * :rwid:   bit width of register file(s) - both FP and INT
  77             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  78         """
  79         self.units = units
  80         self.ldstmode = ldstmode
  81         self.rwid = rwid
  82         self.rwid = rwid
  83         if units and isinstance(units[0], CompUnitsBase):
  84             self.n_units = 0
  85             for u in self.units:
  86                 self.n_units += u.n_units
  87         else:
  88             self.n_units = len(units)
  89
  90         n_units = self.n_units
  91
  92         # inputs
  93         self.issue_i = Signal(n_units, reset_less=True)
  94         self.rd0 = go_record(n_units, "rd0")
  95         self.rd1 = go_record(n_units, "rd1")
  96         self.go_rd_i = [self.rd0.go_i, self.rd1.go_i]  # XXX HACK!
  97         self.wr0 = go_record(n_units, "wr0")
  98         self.go_wr_i = [self.wr0.go_i]
  99         self.shadown_i = Signal(n_units, reset_less=True)
 100         self.go_die_i = Signal(n_units, reset_less=True)
 101         if ldstmode:
 102             self.go_ad_i = Signal(n_units, reset_less=True)
 103             self.go_st_i = Signal(n_units, reset_less=True)
 104
 105         # outputs
 106         self.busy_o = Signal(n_units, reset_less=True)
 107         self.rd_rel_o = [self.rd0.rel_o, self.rd1.rel_o]  # HACK!
 108         self.req_rel_o = self.wr0.rel_o
 109         self.done_o = Signal(n_units, reset_less=True)
 110         if ldstmode:
 111             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 112             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 113             self.adr_rel_o = Signal(n_units, reset_less=True)
 114             self.sto_rel_o = Signal(n_units, reset_less=True)
 115             self.load_mem_o = Signal(n_units, reset_less=True)
 116             self.stwd_mem_o = Signal(n_units, reset_less=True)
 117             self.addr_o = Signal(rwid, reset_less=True)
 118
 119         # in/out register data (note: not register#, actual data)
 120         self.o_data = Signal(rwid, reset_less=True)
 121         self.src1_i = Signal(rwid, reset_less=True)
 122         self.src2_i = Signal(rwid, reset_less=True)
 123         # input operand
 124
 125     def elaborate(self, platform):
 126         m = Module()
 127         comb = m.d.comb
 128
 129         for i, alu in enumerate(self.units):
 130             setattr(m.submodules, "comp%d" % i, alu)
 131
 132         go_rd_l0 = []
 133         go_rd_l1 = []
 134         go_wr_l = []
 135         issue_l = []
 136         busy_l = []
 137         req_rel_l = []
 138         done_l = []
 139         rd_rel0_l = []
 140         rd_rel1_l = []
 141         shadow_l = []
 142         godie_l = []
 143         for alu in self.units:
 144             req_rel_l.append(alu.req_rel_o)
 145             done_l.append(alu.done_o)
 146             shadow_l.append(alu.shadown_i)
 147             godie_l.append(alu.go_die_i)
 148             print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 149             rd_rel0_l.append(alu.rd_rel_o[0])
 150             rd_rel1_l.append(alu.rd_rel_o[1])
 151             go_wr_l.append(alu.go_wr_i)
 152             go_rd_l0.append(alu.go_rd_i[0])
 153             go_rd_l1.append(alu.go_rd_i[1])
 154             issue_l.append(alu.issue_i)
 155             busy_l.append(alu.busy_o)
 156         comb += self.rd0.rel_o.eq(Cat(*rd_rel0_l))
 157         comb += self.rd1.rel_o.eq(Cat(*rd_rel1_l))
 158         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 159         comb += self.done_o.eq(Cat(*done_l))
 160         comb += self.busy_o.eq(Cat(*busy_l))
 161         comb += Cat(*godie_l).eq(self.go_die_i)
 162         comb += Cat(*shadow_l).eq(self.shadown_i)
 163         comb += Cat(*go_wr_l).eq(self.wr0.go_i)  # XXX TODO
 164         comb += Cat(*go_rd_l0).eq(self.rd0.go_i)
 165         comb += Cat(*go_rd_l1).eq(self.rd1.go_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # XXX NOTE: this only works because there is a single "port"
 172         # protected by a single go_wr.  multi-issue requires a bus
 173         # to be inserted here.
 174         if self.units:
 175             o_data = ortreereduce(self.units, "o_data")
 176             comb += self.o_data.eq(o_data)
 177             if self.ldstmode:
 178                 addr_o = ortreereduce(self.units, "addr_o")
 179                 comb += self.addr_o.eq(addr_o)
 180
 181         for i, alu in enumerate(self.units):
 182             comb += alu.src1_i.eq(self.src1_i)
 183             comb += alu.src2_i.eq(self.src2_i)
 184             # temporary: set read mask to 0b111111111
 185             if hasattr(alu, "rdmaskn"):
 186                 with m.If(alu.busy_o):
 187                     comb += alu.rdmaskn.eq(-1)
 188
 189         if not self.ldstmode:
 190             return m
 191
 192         ldmem_l = []
 193         stmem_l = []
 194         go_ad_l = []
 195         go_st_l = []
 196         ld_l = []
 197         st_l = []
 198         adr_rel_l = []
 199         sto_rel_l = []
 200         for alu in self.units:
 201             ld_l.append(alu.ld_o)
 202             st_l.append(alu.st_o)
 203             adr_rel_l.append(alu.adr_rel_o)
 204             sto_rel_l.append(alu.sto_rel_o)
 205             ldmem_l.append(alu.load_mem_o)
 206             stmem_l.append(alu.stwd_mem_o)
 207             go_ad_l.append(alu.go_ad_i)
 208             go_st_l.append(alu.go_st_i)
 209         comb += self.ld_o.eq(Cat(*ld_l))
 210         comb += self.st_o.eq(Cat(*st_l))
 211         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 212         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 213         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 214         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 215         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 216         comb += Cat(*go_st_l).eq(self.go_st_i)
 217
 218         return m
 219
 220
 221 class CompUnitLDSTs(CompUnitsBase):
 222
 223     def __init__(self, rwid, opwid, n_ldsts, l0):
 224         """ Inputs:
 225
 226             * :rwid:   bit width of register file(s) - both FP and INT
 227             * :opwid:  operand bit width
 228         """
 229         self.opwid = opwid
 230
 231         # inputs
 232         self.op = CompLDSTOpSubset("cul_i")
 233
 234         # LD/ST Units
 235         units = []
 236         for i in range(n_ldsts):
 237             pi = l0.l0.dports[i]
 238             units.append(LDSTCompUnit(pi, rwid, awid=48))
 239
 240         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 241
 242     def elaborate(self, platform):
 243         m = CompUnitsBase.elaborate(self, platform)
 244         comb = m.d.comb
 245
 246         # hand the same operation to all units
 247         for ldst in self.units:
 248             comb += ldst.oper_i.eq(self.op)
 249
 250         return m
 251
 252
 253 class CompUnitALUs(CompUnitsBase):
 254
 255     def __init__(self, rwid, opwid, n_alus):
 256         """ Inputs:
 257
 258             * :rwid:   bit width of register file(s) - both FP and INT
 259             * :opwid:  operand bit width
 260         """
 261         self.opwid = opwid
 262
 263         # inputs
 264         self.op = CompALUOpSubset("cua_i")
 265
 266         # Int ALUs
 267         alus = []
 268
 269         units = []
 270         for i in range(n_alus):
 271             fu = ALUFunctionUnit(i)
 272             units.append(fu)
 273             alus.append(fu.alu)
 274
 275         CompUnitsBase.__init__(self, rwid, units)
 276
 277     def elaborate(self, platform):
 278         m = CompUnitsBase.elaborate(self, platform)
 279         comb = m.d.comb
 280
 281         # hand the subset of operation to ALUs
 282         for alu in self.units:
 283             comb += alu.oper_i.eq(self.op)
 284
 285         return m
 286
 287
 288 class CompUnitBR(CompUnitsBase):
 289
 290     def __init__(self, rwid, opwid):
 291         """ Inputs:
 292
 293             * :rwid:   bit width of register file(s) - both FP and INT
 294             * :opwid:  operand bit width
 295
 296             Note: bgt unit is returned so that a shadow unit can be created
 297             for it
 298         """
 299         self.opwid = opwid
 300
 301         # inputs
 302         self.op = CompALUOpSubset("cua_i")  # TODO - CompALUBranchSubset
 303         self.oper_i = Signal(opwid, reset_less=True)
 304         self.imm_i = Signal(rwid, reset_less=True)
 305
 306         # Branch ALU and CU
 307         self.bgt = BranchALU(rwid)
 308         aluopwid = 3  # extra bit for immediate mode
 309         self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
 310         CompUnitsBase.__init__(self, rwid, [self.br1])
 311
 312     def elaborate(self, platform):
 313         m = CompUnitsBase.elaborate(self, platform)
 314         comb = m.d.comb
 315
 316         # hand the same operation to all units
 317         for alu in self.units:
 318             # comb += alu.oper_i.eq(self.op) # TODO
 319             comb += alu.oper_i.eq(self.oper_i)
 320             #comb += alu.imm_i.eq(self.imm_i)
 321
 322         return m
 323
 324
 325 class FunctionUnits(Elaboratable):
 326
 327     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 328         self.n_src, self.n_dst = n_src, n_dst
 329         self.n_reg = n_reg
 330         self.n_int_alus = nf = n_int_alus
 331
 332         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 333         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 334
 335         self.readable_o = Signal(n_int_alus, reset_less=True)
 336         self.writable_o = Signal(n_int_alus, reset_less=True)
 337
 338         # arrays
 339         src = []
 340         rsel = []
 341         rd = []
 342         for i in range(n_src):
 343             j = i + 1  # name numbering to match src1/src2
 344             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 345             rsel.append(Signal(n_reg, name="src%d_rsel_o" %
 346                                j, reset_less=True))
 347             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 348         dst = []
 349         dsel = []
 350         wr = []
 351         for i in range(n_dst):
 352             j = i + 1  # name numbering to match src1/src2
 353             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 354             dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
 355                                j, reset_less=True))
 356             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 357         wpnd = []
 358         pend = []
 359         for i in range(nf):
 360             j = i + 1  # name numbering to match src1/src2
 361             pend.append(Signal(nf, name="rd_src%d_pend_o" %
 362                                j, reset_less=True))
 363             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
 364                                j, reset_less=True))
 365
 366         self.dest_i = dst     # Dest in (top)
 367         self.src_i = src      # oper in (top)
 368
 369         # for Register File Select Lines (horizontal), per-reg
 370         self.dst_rsel_o = dsel  # dest reg (bot)
 371         self.src_rsel_o = rsel  # src reg (bot)
 372
 373         self.go_rd_i = rd
 374         self.go_wr_i = wr
 375
 376         self.go_die_i = Signal(n_int_alus, reset_less=True)
 377         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 378
 379         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 380
 381     def elaborate(self, platform):
 382         m = Module()
 383         comb = m.d.comb
 384         sync = m.d.sync
 385
 386         n_intfus = self.n_int_alus
 387
 388         # Integer FU-FU Dep Matrix
 389         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 390         m.submodules.intfudeps = intfudeps
 391         # Integer FU-Reg Dep Matrix
 392         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 393         m.submodules.intregdeps = intregdeps
 394
 395         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 396         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 397
 398         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 399         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 400
 401         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 402         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 403         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 404
 405         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 406         comb += intfudeps.go_die_i.eq(self.go_die_i)
 407         comb += self.readable_o.eq(intfudeps.readable_o)
 408         comb += self.writable_o.eq(intfudeps.writable_o)
 409
 410         # Connect function issue / arrays, and dest/src1/src2
 411         for i in range(self.n_src):
 412             print(i, self.go_rd_i, intfudeps.go_rd_i)
 413             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 414             comb += intregdeps.src_i[i].eq(self.src_i[i])
 415             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 416             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 417         for i in range(self.n_dst):
 418             print(i, self.go_wr_i, intfudeps.go_wr_i)
 419             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 420             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 421             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 422             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 423         comb += intregdeps.go_die_i.eq(self.go_die_i)
 424         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 425
 426         return m
 427
 428
 429 class Scoreboard(Elaboratable):
 430     def __init__(self, rwid, n_regs):
 431         """ Inputs:
 432
 433             * :rwid:   bit width of register file(s) - both FP and INT
 434             * :n_regs: depth of register file(s) - number of FP and INT regs
 435         """
 436         self.rwid = rwid
 437         self.n_regs = n_regs
 438
 439         # Register Files
 440         self.intregs = RegFileArray(rwid, n_regs)
 441         self.fpregs = RegFileArray(rwid, n_regs)
 442
 443         # Memory (test for now)
 444         pspec = TestMemPspec(ldst_ifacetype='testpi',
 445                              addr_wid=48,
 446                              mask_wid=8,
 447                              reg_wid=64)
 448         dut = TstL0CacheBuffer(pspec)
 449         self.l0 = TstL0CacheBuffer(pspec)
 450
 451         # issue q needs to get at these
 452         self.aluissue = IssueUnitGroup(2)
 453         self.lsissue = IssueUnitGroup(2)
 454         self.brissue = IssueUnitGroup(1)
 455         # and these
 456         self.instr = Decode2ToExecute1Type("sc_instr")
 457         self.br_oper_i = Signal(4, reset_less=True)
 458         self.br_imm_i = Signal(rwid, reset_less=True)
 459         self.ls_oper_i = Signal(4, reset_less=True)
 460
 461         # inputs
 462         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 463         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 464         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 465         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 466
 467         # outputs
 468         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 469         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 470
 471         # for branch speculation experiment.  branch_direction = 0 if
 472         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 473         # branch_succ and branch_fail are requests to have the current
 474         # instruction be dependent on the branch unit "shadow" capability.
 475         self.branch_succ_i = Signal(reset_less=True)
 476         self.branch_fail_i = Signal(reset_less=True)
 477         self.branch_direction_o = Signal(2, reset_less=True)
 478
 479     def elaborate(self, platform):
 480         m = Module()
 481         comb = m.d.comb
 482         sync = m.d.sync
 483
 484         m.submodules.intregs = self.intregs
 485         m.submodules.fpregs = self.fpregs
 486         m.submodules.l0 = l0 = self.l0
 487
 488         # register ports
 489         int_dest = self.intregs.write_port("dest")
 490         int_src1 = self.intregs.read_port("src1")
 491         int_src2 = self.intregs.read_port("src2")
 492
 493         fp_dest = self.fpregs.write_port("dest")
 494         fp_src1 = self.fpregs.read_port("src1")
 495         fp_src2 = self.fpregs.read_port("src2")
 496
 497         # Int ALUs and BR ALUs
 498         n_int_alus = 5
 499         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 500         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 501
 502         # LDST Comp Units
 503         n_ldsts = 2
 504         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
 505
 506         # Comp Units
 507         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 508         bgt = cub.bgt  # get at the branch computation unit
 509         br1 = cub.br1
 510
 511         # Int FUs
 512         fu_n_src = 2
 513         fu_n_dst = 1
 514         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 515                                                      fu_n_src, fu_n_dst)
 516
 517         # Memory FUs
 518         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 519
 520         # Memory Priority Picker 1: one gateway per memory port
 521         # picks 1 reader and 1 writer to intreg
 522         mempick1 = GroupPicker(n_ldsts, 1, 1)
 523         m.submodules.mempick1 = mempick1
 524
 525         # Count of number of FUs
 526         n_intfus = n_int_alus
 527         n_fp_fus = 0  # for now
 528
 529         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 530         # picks 1 reader and 1 writer to intreg
 531         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 532         m.submodules.intpick1 = ipick1
 533
 534         # INT/FP Issue Unit
 535         regdecode = RegDecode(self.n_regs)
 536         m.submodules.regdecode = regdecode
 537         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 538         m.submodules.issueunit = issueunit
 539
 540         # Shadow Matrix.  currently n_intfus shadows, to be used for
 541         # write-after-write hazards.  NOTE: there is one extra for branches,
 542         # so the shadow width is increased by 1
 543         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 544         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 545
 546         # record previous instruction to cast shadow on current instruction
 547         prev_shadow = Signal(n_intfus)
 548
 549         # Branch Speculation recorder.  tracks the success/fail state as
 550         # each instruction is issued, so that when the branch occurs the
 551         # allow/cancel can be issued as appropriate.
 552         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 553
 554         # ---------
 555         # ok start wiring things together...
 556         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 557         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 558         # ---------
 559
 560         # ---------
 561         # Issue Unit is where it starts.  set up some in/outs for this module
 562         # ---------
 563         comb += [regdecode.dest_i.eq(self.int_dest_i),
 564                  regdecode.src1_i.eq(self.int_src1_i),
 565                  regdecode.src2_i.eq(self.int_src2_i),
 566                  regdecode.enable_i.eq(self.reg_enable_i),
 567                  self.issue_o.eq(issueunit.issue_o)
 568                  ]
 569
 570         # take these to outside (issue needs them)
 571         comb += cua.op.eq_from_execute1(self.instr.do)
 572         comb += cub.oper_i.eq(self.br_oper_i)
 573         comb += cub.imm_i.eq(self.br_imm_i)
 574         comb += cul.op.eq_from_execute1(self.instr.do)
 575
 576         # TODO: issueunit.f (FP)
 577
 578         # and int function issue / busy arrays, and dest/src1/src2
 579         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 580         comb += intfus.src_i[0].eq(regdecode.src1_o)
 581         comb += intfus.src_i[1].eq(regdecode.src2_o)
 582
 583         fn_issue_o = issueunit.fn_issue_o
 584
 585         comb += intfus.fn_issue_i.eq(fn_issue_o)
 586         comb += issueunit.busy_i.eq(cu.busy_o)
 587         comb += self.busy_o.eq(cu.busy_o.bool())
 588
 589         # ---------
 590         # Memory Function Unit
 591         # ---------
 592         reset_b = Signal(cul.n_units, reset_less=True)
 593         # XXX was cul.go_wr_i not done.o
 594         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 595         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 596
 597         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 598         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 599         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 600
 601         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 602         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 603         # issue_i.  multi-issue gets a bit more complex but not a lot.
 604         prior_ldsts = Signal(cul.n_units, reset_less=True)
 605         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 606         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 607             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 608         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 609             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 610
 611         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 612         # just immediately activate go_adr
 613         sync += cul.go_ad_i.eq(cul.adr_rel_o)
 614
 615         # connect up address data
 616         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 617         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 618
 619         # connect loadable / storable to go_ld/go_st.
 620         # XXX should only be done when the memory ld/st has actually happened!
 621         go_st_i = Signal(cul.n_units, reset_less=True)
 622         go_ld_i = Signal(cul.n_units, reset_less=True)
 623         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 624                            cul.adr_rel_o & cul.ld_o)
 625         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 626                            cul.sto_rel_o & cul.st_o)
 627         comb += memfus.go_ld_i.eq(go_ld_i)
 628         comb += memfus.go_st_i.eq(go_st_i)
 629         #comb += cul.go_wr_i.eq(go_ld_i)
 630         comb += cul.go_st_i.eq(go_st_i)
 631
 632         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 633         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 634         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 635
 636         # ---------
 637         # merge shadow matrices outputs
 638         # ---------
 639
 640         # these are explained in ShadowMatrix docstring, and are to be
 641         # connected to the FUReg and FUFU Matrices, to get them to reset
 642         anydie = Signal(n_intfus, reset_less=True)
 643         allshadown = Signal(n_intfus, reset_less=True)
 644         shreset = Signal(n_intfus, reset_less=True)
 645         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 646         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 647         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 648
 649         # ---------
 650         # connect fu-fu matrix
 651         # ---------
 652
 653         # Group Picker... done manually for now.
 654         go_rd_o = ipick1.go_rd_o
 655         delay_pick_l = []
 656         go_wr_o = ipick1.go_wr_o
 657         go_rd_i = intfus.go_rd_i
 658         go_wr_i = intfus.go_wr_i
 659         go_die_i = intfus.go_die_i
 660         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 661         for i in range(fu_n_src):
 662             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 663         for i in range(fu_n_dst):
 664             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 665         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 666
 667         # Connect Picker
 668         # ---------
 669         int_rd_o = intfus.readable_o
 670         rrel_o = cu.rd_rel_o
 671         rqrl_o = cu.req_rel_o
 672         for i in range(fu_n_src):
 673             # connect with a delay so that src data arrives at the right time
 674             pick = Signal(n_intfus, name="pick_%d" % i)
 675             delay_pick = Signal(n_intfus, name="dp_%d" % i)
 676             rp = Signal(n_intfus, name="rp_%d" % i)
 677             comb += pick[0:n_intfus].eq(rrel_o[i][0:n_intfus] & ~delay_pick)
 678             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(pick[0:n_intfus])
 679             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 680             sync += delay_pick.eq(rp)
 681             comb += rp.eq(go_rd_o[i])
 682             delay_pick_l.append(delay_pick)
 683         int_wr_o = intfus.writable_o
 684         for i in range(fu_n_dst):
 685             # XXX FIXME: rqrl_o[i] here
 686             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 687             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 688
 689         # ---------
 690         # Shadow Matrix
 691         # ---------
 692
 693         comb += shadows.issue_i.eq(fn_issue_o)
 694         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 695         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 696         # ---------
 697         # NOTE; this setup is for the instruction order preservation...
 698
 699         # connect shadows / go_dies to Computation Units
 700         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 701         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 702
 703         # ok connect first n_int_fu shadows to busy lines, to create an
 704         # instruction-order linked-list-like arrangement, using a bit-matrix
 705         # (instead of e.g. a ring buffer).
 706
 707         # when written, the shadow can be cancelled (and was good)
 708         for i in range(n_intfus):
 709             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 710             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 711             # because the comp unit is only free once completed
 712             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 713
 714         # *previous* instruction shadows *current* instruction, and, obviously,
 715         # if the previous is completed (!busy) don't cast the shadow!
 716         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 717         for i in range(n_intfus):
 718             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 719
 720         # ---------
 721         # ... and this is for branch speculation.  it uses the extra bit
 722         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 723         # only needs to set shadow_i, s_fail_i and s_good_i
 724
 725         # issue captures shadow_i (if enabled)
 726         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 727
 728         bactive = Signal(reset_less=True)
 729         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 730
 731         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 732         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 733             comb += bshadow.issue_i.eq(fn_issue_o)
 734             for i in range(n_intfus):
 735                 with m.If(fn_issue_o & (Const(1 << i))):
 736                     comb += bshadow.shadow_i[i][0].eq(1)
 737
 738         # finally, we need an indicator to the test infrastructure as to
 739         # whether the branch succeeded or failed, plus, link up to the
 740         # "recorder" of whether the instruction was under shadow or not
 741
 742         with m.If(br1.issue_i):
 743             sync += bspec.active_i.eq(1)
 744         with m.If(self.branch_succ_i):
 745             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 746         with m.If(self.branch_fail_i):
 747             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 748
 749         # branch is active (TODO: a better signal: this is over-using the
 750         # go_write signal - actually the branch should not be "writing")
 751         with m.If(br1.go_wr_i):
 752             sync += self.branch_direction_o.eq(br1.o_data+Const(1, 2))
 753             sync += bspec.active_i.eq(0)
 754             comb += bspec.br_i.eq(1)
 755             # branch occurs if data == 1, failed if data == 0
 756             comb += bspec.br_ok_i.eq(br1.o_data == 1)
 757             for i in range(n_intfus):
 758                 # *expected* direction of the branch matched against *actual*
 759                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 760                 # ... or it didn't
 761                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 762
 763         # ---------
 764         # Connect Register File(s)
 765         # ---------
 766         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 767         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 768         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 769
 770         # connect ALUs to regfile
 771         comb += int_dest.i_data.eq(cu.o_data)
 772         comb += cu.src1_i.eq(int_src1.o_data)
 773         comb += cu.src2_i.eq(int_src2.o_data)
 774
 775         # connect ALU Computation Units
 776         for i in range(fu_n_src):
 777             comb += cu.go_rd_i[i][0:n_intfus].eq(delay_pick_l[i][0:n_intfus])
 778         for i in range(fu_n_dst):
 779             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 780         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 781
 782         return m
 783
 784     def __iter__(self):
 785         yield from self.intregs
 786         yield from self.fpregs
 787         yield self.int_dest_i
 788         yield self.int_src1_i
 789         yield self.int_src2_i
 790         yield self.issue_o
 791         yield self.branch_succ_i
 792         yield self.branch_fail_i
 793         yield self.branch_direction_o
 794
 795     def ports(self):
 796         return list(self)
 797
 798
 799 class IssueToScoreboard(Elaboratable):
 800
 801     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 802         self.qlen = qlen
 803         self.n_in = n_in
 804         self.n_out = n_out
 805         self.rwid = rwid
 806         self.opw = opwid
 807         self.n_regs = n_regs
 808
 809         mqbits = unsigned(int(log(qlen) / log(2))+2)
 810         self.p_add_i = Signal(mqbits)  # instructions to add (from i_data)
 811         self.p_o_ready = Signal()  # instructions were added
 812         self.i_data = Instruction._nq(n_in, "i_data")
 813
 814         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 815         self.qlen_o = Signal(mqbits, reset_less=True)
 816
 817     def elaborate(self, platform):
 818         m = Module()
 819         comb = m.d.comb
 820         sync = m.d.sync
 821
 822         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 823                           self.n_in, self.n_out)
 824         sc = Scoreboard(self.rwid, self.n_regs)
 825         m.submodules.iq = iq
 826         m.submodules.sc = sc
 827
 828         # get at the regfile for testing
 829         self.intregs = sc.intregs
 830
 831         # and the "busy" signal and instruction queue length
 832         comb += self.busy_o.eq(sc.busy_o)
 833         comb += self.qlen_o.eq(iq.qlen_o)
 834
 835         # link up instruction queue
 836         comb += iq.p_add_i.eq(self.p_add_i)
 837         comb += self.p_o_ready.eq(iq.p_o_ready)
 838         for i in range(self.n_in):
 839             comb += eq(iq.i_data[i], self.i_data[i])
 840
 841         # take instruction and process it.  note that it's possible to
 842         # "inspect" the queue contents *without* actually removing the
 843         # items.  items are only removed when the
 844
 845         # in "waiting" state
 846         wait_issue_br = Signal()
 847         wait_issue_alu = Signal()
 848         wait_issue_ls = Signal()
 849
 850         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 851             # set instruction pop length to 1 if the unit accepted
 852             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 853                 with m.If(iq.qlen_o != 0):
 854                     comb += iq.n_sub_i.eq(1)
 855             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 856                 with m.If(iq.qlen_o != 0):
 857                     comb += iq.n_sub_i.eq(1)
 858             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 859                 with m.If(iq.qlen_o != 0):
 860                     comb += iq.n_sub_i.eq(1)
 861
 862         # see if some instruction(s) are here.  note that this is
 863         # "inspecting" the in-place queue.  note also that on the
 864         # cycle following "waiting" for fn_issue_o to be set, the
 865         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 866         with m.If(iq.qlen_o != 0):
 867             # get the operands and operation
 868             instr = iq.o_data[0]
 869             imm = instr.do.imm_data.data
 870             dest = instr.write_reg.data
 871             src1 = instr.read_reg1.data
 872             src2 = instr.read_reg2.data
 873             op = instr.do.insn_type
 874             fu = instr.do.fn_unit
 875             opi = instr.do.imm_data.ok  # immediate set
 876
 877             # set the src/dest regs
 878             comb += sc.int_dest_i.eq(dest)
 879             comb += sc.int_src1_i.eq(src1)
 880             comb += sc.int_src2_i.eq(src2)
 881             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 882             comb += sc.instr.eq(instr)
 883
 884             # choose a Function-Unit-Group
 885             with m.If(fu == Function.ALU):  # alu
 886                 comb += sc.aluissue.insn_i.eq(1)  # enable alu issue
 887                 comb += wait_issue_alu.eq(1)
 888             with m.Elif(fu == Function.LDST):  # ld/st
 889                 comb += sc.lsissue.insn_i.eq(1)  # enable ldst issue
 890                 comb += wait_issue_ls.eq(1)
 891
 892             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 893                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 894                 comb += sc.br_imm_i.eq(imm)
 895                 comb += sc.brissue.insn_i.eq(1)
 896                 comb += wait_issue_br.eq(1)
 897             # XXX TODO
 898             # these indicate that the instruction is to be made
 899             # shadow-dependent on
 900             # (either) branch success or branch fail
 901             # yield sc.branch_fail_i.eq(branch_fail)
 902             # yield sc.branch_succ_i.eq(branch_success)
 903
 904         return m
 905
 906     def __iter__(self):
 907         yield self.p_o_ready
 908         for o in self.i_data:
 909             yield from list(o)
 910         yield self.p_add_i
 911
 912     def ports(self):
 913         return list(self)
 914
 915
 916 def power_instr_q(dut, pdecode2, ins, code):
 917     instrs = [pdecode2.e]
 918
 919     sendlen = 1
 920     for idx, instr in enumerate(instrs):
 921         yield dut.i_data[idx].eq(instr)
 922         insn_type = yield instr.do.insn_type
 923         fn_unit = yield instr.do.fn_unit
 924         print("senddata ", idx, insn_type, fn_unit, instr)
 925     yield dut.p_add_i.eq(sendlen)
 926     yield
 927     o_p_ready = yield dut.p_o_ready
 928     while not o_p_ready:
 929         yield
 930         o_p_ready = yield dut.p_o_ready
 931
 932     yield dut.p_add_i.eq(0)
 933
 934
 935 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 936             branch_success, branch_fail):
 937     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 938                'imm_data': (imm, op_imm),
 939                'read_reg1': src1, 'read_reg2': src2}]
 940
 941     sendlen = 1
 942     for idx, instr in enumerate(instrs):
 943         imm, op_imm = instr['imm_data']
 944         reg1 = instr['read_reg1']
 945         reg2 = instr['read_reg2']
 946         dest = instr['write_reg']
 947         insn_type = instr['insn_type']
 948         fn_unit = instr['fn_unit']
 949         yield dut.i_data[idx].do.insn_type.eq(insn_type)
 950         yield dut.i_data[idx].do.fn_unit.eq(fn_unit)
 951         yield dut.i_data[idx].read_reg1.data.eq(reg1)
 952         yield dut.i_data[idx].read_reg1.ok.eq(1)  # XXX TODO
 953         yield dut.i_data[idx].read_reg2.data.eq(reg2)
 954         yield dut.i_data[idx].read_reg2.ok.eq(1)  # XXX TODO
 955         yield dut.i_data[idx].write_reg.data.eq(dest)
 956         yield dut.i_data[idx].write_reg.ok.eq(1)  # XXX TODO
 957         yield dut.i_data[idx].do.imm_data.data.eq(imm)
 958         yield dut.i_data[idx].do.imm_data.ok.eq(op_imm)
 959         #di = yield dut.i_data[idx]
 960         #print("senddata %d %x" % (idx, di))
 961     yield dut.p_add_i.eq(sendlen)
 962     yield
 963     o_p_ready = yield dut.p_o_ready
 964     while not o_p_ready:
 965         yield
 966         o_p_ready = yield dut.p_o_ready
 967
 968     yield dut.p_add_i.eq(0)
 969
 970
 971 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 972     yield from disable_issue(dut)
 973     yield dut.int_dest_i.eq(dest)
 974     yield dut.int_src1_i.eq(src1)
 975     yield dut.int_src2_i.eq(src2)
 976     if (op & (0x3 << 2)) != 0:  # branch
 977         yield dut.brissue.insn_i.eq(1)
 978         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 979         yield dut.br_imm_i.eq(imm)
 980         dut_issue = dut.brissue
 981     else:
 982         yield dut.aluissue.insn_i.eq(1)
 983         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 984         yield dut.alu_imm_i.eq(imm)
 985         dut_issue = dut.aluissue
 986     yield dut.reg_enable_i.eq(1)
 987
 988     # these indicate that the instruction is to be made shadow-dependent on
 989     # (either) branch success or branch fail
 990     yield dut.branch_fail_i.eq(branch_fail)
 991     yield dut.branch_succ_i.eq(branch_success)
 992
 993     yield
 994     yield from wait_for_issue(dut, dut_issue)
 995
 996
 997 def print_reg(dut, rnums):
 998     rs = []
 999     for rnum in rnums:
1000         reg = yield dut.intregs.regs[rnum].reg
1001         rs.append("%x" % reg)
1002     rnums = map(str, rnums)
1003     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1004
1005
1006 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1007     insts = []
1008     for i in range(n_ops):
1009         src1 = randint(1, dut.n_regs-1)
1010         src2 = randint(1, dut.n_regs-1)
1011         imm = randint(1, (1 << dut.rwid)-1)
1012         dest = randint(1, dut.n_regs-1)
1013         op = randint(0, max_opnums)
1014         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
1015
1016         if shadowing:
1017             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1018         else:
1019             insts.append((src1, src2, dest, op, opi, imm))
1020     return insts
1021
1022
1023 def wait_for_busy_clear(dut):
1024     while True:
1025         busy_o = yield dut.busy_o
1026         if not busy_o:
1027             break
1028         print("busy",)
1029         yield
1030
1031
1032 def disable_issue(dut):
1033     yield dut.aluissue.insn_i.eq(0)
1034     yield dut.brissue.insn_i.eq(0)
1035     yield dut.lsissue.insn_i.eq(0)
1036
1037
1038 def wait_for_issue(dut, dut_issue):
1039     while True:
1040         issue_o = yield dut_issue.fn_issue_o
1041         if issue_o:
1042             yield from disable_issue(dut)
1043             yield dut.reg_enable_i.eq(0)
1044             break
1045         print("busy",)
1046         # yield from print_reg(dut, [1,2,3])
1047         yield
1048     # yield from print_reg(dut, [1,2,3])
1049
1050
1051 def scoreboard_branch_sim(dut, alusim):
1052
1053     iseed = 3
1054
1055     for i in range(1):
1056
1057         print("rseed", iseed)
1058         seed(iseed)
1059         iseed += 1
1060
1061         yield dut.branch_direction_o.eq(0)
1062
1063         # set random values in the registers
1064         for i in range(1, dut.n_regs):
1065             val = 31+i*3
1066             val = randint(0, (1 << alusim.rwidth)-1)
1067             yield dut.intregs.regs[i].reg.eq(val)
1068             alusim.setval(i, val)
1069
1070         if False:
1071             # create some instructions: branches create a tree
1072             insts = create_random_ops(dut, 1, True, 1)
1073             #insts.append((6, 6, 1, 2, (0, 0)))
1074             #insts.append((4, 3, 3, 0, (0, 0)))
1075
1076             src1 = randint(1, dut.n_regs-1)
1077             src2 = randint(1, dut.n_regs-1)
1078             #op = randint(4, 7)
1079             op = 4  # only BGT at the moment
1080
1081             branch_ok = create_random_ops(dut, 1, True, 1)
1082             branch_fail = create_random_ops(dut, 1, True, 1)
1083
1084             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1085
1086         if True:
1087             insts = []
1088             insts.append((3, 5, 2, 0, (0, 0)))
1089             branch_ok = []
1090             branch_fail = []
1091             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1092             branch_ok.append(None)
1093             branch_fail.append((1, 1, 2, 0, (0, 1)))
1094             #branch_fail.append( None )
1095             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1096
1097         siminsts = deepcopy(insts)
1098
1099         # issue instruction(s)
1100         i = -1
1101         instrs = insts
1102         branch_direction = 0
1103         while instrs:
1104             yield
1105             yield
1106             i += 1
1107             branch_direction = yield dut.branch_direction_o  # way branch went
1108             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1109             if branch_direction == 1 and shadow_on:
1110                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1111                 continue  # branch was "success" and this is a "failed"... skip
1112             if branch_direction == 2 and shadow_off:
1113                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1114                 continue  # branch was "fail" and this is a "success"... skip
1115             if branch_direction != 0:
1116                 shadow_on = 0
1117                 shadow_off = 0
1118             is_branch = op >= 4
1119             if is_branch:
1120                 branch_ok, branch_fail = dest
1121                 dest = src2
1122                 # ok zip up the branch success / fail instructions and
1123                 # drop them into the queue, one marked "to have branch success"
1124                 # the other to be marked shadow branch "fail".
1125                 # one out of each of these will be cancelled
1126                 for ok, fl in zip(branch_ok, branch_fail):
1127                     if ok:
1128                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1129                     if fl:
1130                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1131             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1132                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1133             yield from int_instr(dut, op, src1, src2, dest,
1134                                  shadow_on, shadow_off)
1135
1136         # wait for all instructions to stop before checking
1137         yield
1138         yield from wait_for_busy_clear(dut)
1139
1140         i = -1
1141         while siminsts:
1142             instr = siminsts.pop(0)
1143             if instr is None:
1144                 continue
1145             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1146             i += 1
1147             is_branch = op >= 4
1148             if is_branch:
1149                 branch_ok, branch_fail = dest
1150                 dest = src2
1151             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1152                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1153             branch_res = alusim.op(op, src1, src2, dest)
1154             if is_branch:
1155                 if branch_res:
1156                     siminsts += branch_ok
1157                 else:
1158                     siminsts += branch_fail
1159
1160         # check status
1161         yield from alusim.check(dut)
1162         yield from alusim.dump(dut)
1163
1164
1165 def power_sim(m, dut, pdecode2, instruction, alusim):
1166
1167     seed(0)
1168
1169     for i in range(1):
1170
1171         # set random values in the registers
1172         for i in range(1, dut.n_regs):
1173             #val = randint(0, (1<<alusim.rwidth)-1)
1174             #val = 31+i*3
1175             val = i  # XXX actually, not random at all
1176             yield dut.intregs.regs[i].reg.eq(val)
1177             alusim.setval(i, val)
1178
1179         # create some instructions
1180         lst = []
1181         if False:
1182             lst += ["addi 2, 0, 0x4321",
1183                     "addi 3, 0, 0x1234",
1184                     "add  1, 3, 2",
1185                     "add  4, 3, 5"
1186                     ]
1187         if True:
1188             lst += ["lbzu 6, 7(2)",
1189
1190                     ]
1191
1192         with Program(lst, bigendian=False) as program:
1193             gen = program.generate_instructions()
1194
1195             # issue instruction(s), wait for issue to be free before proceeding
1196             for ins, code in zip(gen, program.assembly.splitlines()):
1197                 yield instruction.eq(ins)          # raw binary instr.
1198                 yield  # Delay(1e-6)
1199
1200                 print("binary 0x{:X}".format(ins & 0xffffffff))
1201                 print("assembly", code)
1202
1203                 #alusim.op(op, opi, imm, src1, src2, dest)
1204                 yield from power_instr_q(dut, pdecode2, ins, code)
1205
1206         # wait for all instructions to stop before checking
1207         while True:
1208             iqlen = yield dut.qlen_o
1209             if iqlen == 0:
1210                 break
1211             yield
1212         yield
1213         yield
1214         yield
1215         yield
1216         yield from wait_for_busy_clear(dut)
1217
1218         # check status
1219         yield from alusim.check(dut)
1220         yield from alusim.dump(dut)
1221
1222
1223 def scoreboard_sim(dut, alusim):
1224
1225     seed(0)
1226
1227     for i in range(1):
1228
1229         # set random values in the registers
1230         for i in range(1, dut.n_regs):
1231             #val = randint(0, (1<<alusim.rwidth)-1)
1232             #val = 31+i*3
1233             val = i
1234             yield dut.intregs.regs[i].reg.eq(val)
1235             alusim.setval(i, val)
1236
1237         # create some instructions (some random, some regression tests)
1238         instrs = []
1239         if False:
1240             instrs = create_random_ops(dut, 15, True, 4)
1241
1242         if False:  # LD/ST test (with immediate)
1243             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1244             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1245
1246         if False:
1247             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1248
1249         if False:
1250             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1251             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1252             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1253
1254         if True:
1255             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1256                            0, 0, (0, 0)))
1257             instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1258                            0, 0, (0, 0)))
1259         if True:
1260             instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1261                            1, 7, (0, 0)))
1262         if False:
1263             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1264                            0, 0, (0, 0)))
1265
1266         if False:
1267             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1268             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1269             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1270             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1271             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1272
1273         if False:
1274             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1275             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1276             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1277
1278         if False:
1279             instrs.append((5, 6, 2, 1))
1280             instrs.append((2, 2, 4, 0))
1281             #instrs.append((2, 2, 3, 1))
1282
1283         if False:
1284             instrs.append((2, 1, 2, 3))
1285
1286         if False:
1287             instrs.append((2, 6, 2, 1))
1288             instrs.append((2, 1, 2, 0))
1289
1290         if False:
1291             instrs.append((1, 2, 7, 2))
1292             instrs.append((7, 1, 5, 0))
1293             instrs.append((4, 4, 1, 1))
1294
1295         if False:
1296             instrs.append((5, 6, 2, 2))
1297             instrs.append((1, 1, 4, 1))
1298             instrs.append((6, 5, 3, 0))
1299
1300         if False:
1301             # Write-after-Write Hazard
1302             instrs.append((3, 6, 7, 2))
1303             instrs.append((4, 4, 7, 1))
1304
1305         if False:
1306             # self-read/write-after-write followed by Read-after-Write
1307             instrs.append((1, 1, 1, 1))
1308             instrs.append((1, 5, 3, 0))
1309
1310         if False:
1311             # Read-after-Write followed by self-read-after-write
1312             instrs.append((5, 6, 1, 2))
1313             instrs.append((1, 1, 1, 1))
1314
1315         if False:
1316             # self-read-write sandwich
1317             instrs.append((5, 6, 1, 2))
1318             instrs.append((1, 1, 1, 1))
1319             instrs.append((1, 5, 3, 0))
1320
1321         if False:
1322             # very weird failure
1323             instrs.append((5, 2, 5, 2))
1324             instrs.append((2, 6, 3, 0))
1325             instrs.append((4, 2, 2, 1))
1326
1327         if False:
1328             v1 = 4
1329             yield dut.intregs.regs[5].reg.eq(v1)
1330             alusim.setval(5, v1)
1331             yield dut.intregs.regs[3].reg.eq(5)
1332             alusim.setval(3, 5)
1333             instrs.append((5, 3, 3, 4, (0, 0)))
1334             instrs.append((4, 2, 1, 2, (0, 1)))
1335
1336         if False:
1337             v1 = 6
1338             yield dut.intregs.regs[5].reg.eq(v1)
1339             alusim.setval(5, v1)
1340             yield dut.intregs.regs[3].reg.eq(5)
1341             alusim.setval(3, 5)
1342             instrs.append((5, 3, 3, 4, (0, 0)))
1343             instrs.append((4, 2, 1, 2, (1, 0)))
1344
1345         if False:
1346             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1347             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1348             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1349             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1350             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1351             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1352             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1353             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1354             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1355
1356         # issue instruction(s), wait for issue to be free before proceeding
1357         print("instructions", instrs)
1358         for i, instr in enumerate(instrs):
1359             print("issue instruction", i, instr)
1360             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1361
1362             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1363                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1364             alusim.op(op, opi, imm, src1, src2, dest)
1365             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1366                                br_ok, br_fail)
1367
1368         # wait for all instructions to stop before checking
1369         while True:
1370             iqlen = yield dut.qlen_o
1371             if iqlen == 0:
1372                 break
1373             yield
1374         yield
1375         yield
1376         yield
1377         yield
1378         yield from wait_for_busy_clear(dut)
1379
1380         # check status
1381         yield from alusim.check(dut)
1382         yield from alusim.dump(dut)
1383
1384
1385 def test_scoreboard():
1386     regwidth = 64
1387     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1388     alusim = RegSim(regwidth, 8)
1389     memsim = MemSim(16, 8)
1390
1391     m = Module()
1392     comb = m.d.comb
1393     instruction = Signal(32)
1394
1395     # set up the decoder (and simulator, later)
1396     pdecode = create_pdecode()
1397     #simulator = ISA(pdecode, initial_regs)
1398
1399     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1400     m.submodules.sim = dut
1401
1402     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1403     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1404
1405     vl = rtlil.convert(m, ports=dut.ports())
1406     with open("test_scoreboard6600.il", "w") as f:
1407         f.write(vl)
1408
1409     #run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1410     #               vcd_name='test_powerboard6600.vcd')
1411
1412     run_simulation(dut, scoreboard_sim(dut, alusim),
1413                   vcd_name='test_scoreboard6600.vcd')
1414
1415     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1416     #                    vcd_name='test_scoreboard6600.vcd')
1417
1418
1419 if __name__ == '__main__':
1420     test_scoreboard()