src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit
  19 from soc.experiment.compldst import LDSTCompUnit
  20 from soc.experiment.testmem import TestMemory
  21
  22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  23
  24 from soc.decoder.power_enums import InternalOp, Function
  25 from soc.decoder.power_decoder import (create_pdecode)
  26 from soc.decoder.power_decoder2 import (PowerDecode2)
  27 from soc.simulator.program import Program
  28
  29
  30 from nmutil.latch import SRLatch
  31 from nmutil.nmoperator import eq
  32
  33 from random import randint, seed
  34 from copy import deepcopy
  35 from math import log
  36
  37 from soc.experiment.sim import RegSim, MemSim
  38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  39
  40
  41 class CompUnitsBase(Elaboratable):
  42     """ Computation Unit Base class.
  43
  44         Amazingly, this class works recursively.  It's supposed to just
  45         look after some ALUs (that can handle the same operations),
  46         grouping them together, however it turns out that the same code
  47         can also group *groups* of Computation Units together as well.
  48
  49         Basically it was intended just to concatenate the ALU's issue,
  50         go_rd etc. signals together, which start out as bits and become
  51         sequences.  Turns out that the same trick works just as well
  52         on Computation Units!
  53
  54         So this class may be used recursively to present a top-level
  55         sequential concatenation of all the signals in and out of
  56         ALUs, whilst at the same time making it convenient to group
  57         ALUs together.
  58
  59         At the lower level, the intent is that groups of (identical)
  60         ALUs may be passed the same operation.  Even beyond that,
  61         the intent is that that group of (identical) ALUs actually
  62         share the *same pipeline* and as such become a "Concurrent
  63         Computation Unit" as defined by Mitch Alsup (see section
  64         11.4.9.3)
  65     """
  66
  67     def __init__(self, rwid, units, ldstmode=False):
  68         """ Inputs:
  69
  70             * :rwid:   bit width of register file(s) - both FP and INT
  71             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  72         """
  73         self.units = units
  74         self.ldstmode = ldstmode
  75         self.rwid = rwid
  76         self.rwid = rwid
  77         if units and isinstance(units[0], CompUnitsBase):
  78             self.n_units = 0
  79             for u in self.units:
  80                 self.n_units += u.n_units
  81         else:
  82             self.n_units = len(units)
  83
  84         n_units = self.n_units
  85
  86         # inputs
  87         self.issue_i = Signal(n_units, reset_less=True)
  88         self.go_rd0_i = Signal(n_units, reset_less=True)
  89         self.go_rd1_i = Signal(n_units, reset_less=True)
  90         self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK!
  91         self.go_wr_i = Signal(n_units, reset_less=True)
  92         self.shadown_i = Signal(n_units, reset_less=True)
  93         self.go_die_i = Signal(n_units, reset_less=True)
  94         if ldstmode:
  95             self.go_ad_i = Signal(n_units, reset_less=True)
  96             self.go_st_i = Signal(n_units, reset_less=True)
  97
  98         # outputs
  99         self.busy_o = Signal(n_units, reset_less=True)
 100         self.rd_rel0_o = Signal(n_units, reset_less=True)
 101         self.rd_rel1_o = Signal(n_units, reset_less=True)
 102         self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK!
 103         self.req_rel_o = Signal(n_units, reset_less=True)
 104         self.done_o = Signal(n_units, reset_less=True)
 105         if ldstmode:
 106             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 107             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 108             self.adr_rel_o = Signal(n_units, reset_less=True)
 109             self.sto_rel_o = Signal(n_units, reset_less=True)
 110             self.load_mem_o = Signal(n_units, reset_less=True)
 111             self.stwd_mem_o = Signal(n_units, reset_less=True)
 112             self.addr_o = Signal(rwid, reset_less=True)
 113
 114         # in/out register data (note: not register#, actual data)
 115         self.data_o = Signal(rwid, reset_less=True)
 116         self.src1_i = Signal(rwid, reset_less=True)
 117         self.src2_i = Signal(rwid, reset_less=True)
 118         # input operand
 119
 120     def elaborate(self, platform):
 121         m = Module()
 122         comb = m.d.comb
 123
 124         for i, alu in enumerate(self.units):
 125             setattr(m.submodules, "comp%d" % i, alu)
 126
 127         go_rd_l0 = []
 128         go_rd_l1 = []
 129         go_wr_l = []
 130         issue_l = []
 131         busy_l = []
 132         req_rel_l = []
 133         done_l = []
 134         rd_rel0_l = []
 135         rd_rel1_l = []
 136         shadow_l = []
 137         godie_l = []
 138         for alu in self.units:
 139             req_rel_l.append(alu.req_rel_o)
 140             done_l.append(alu.done_o)
 141             shadow_l.append(alu.shadown_i)
 142             godie_l.append(alu.go_die_i)
 143             print (alu, alu.rd_rel_o)
 144             if isinstance(alu, LDSTCompUnit) or \
 145                isinstance(alu, CompUnitBR) or \
 146                isinstance(alu, ComputationUnitNoDelay):
 147                 dummy1 = Signal(64, reset_less=True)
 148                 dummy2 = Signal(64, reset_less=True)
 149                 dummy3 = Signal(64, reset_less=True)
 150                 go_wr_l.append(dummy1)
 151                 go_rd_l0.append(dummy2)
 152                 go_rd_l1.append(dummy3)
 153             else:
 154                 rd_rel0_l.append(alu.rd_rel_o[0])
 155                 rd_rel1_l.append(alu.rd_rel_o[1])
 156                 go_wr_l.append(alu.go_wr_i[0])
 157                 go_rd_l0.append(alu.go_rd_i[0])
 158                 go_rd_l1.append(alu.go_rd_i[1])
 159                 rd_rel0_l.append(Const(0, 64)) # FIXME
 160                 rd_rel1_l.append(Const(0, 64)) # FIXME
 161             issue_l.append(alu.issue_i)
 162             busy_l.append(alu.busy_o)
 163         comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l))
 164         comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l))
 165         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 166         comb += self.done_o.eq(Cat(*done_l))
 167         comb += self.busy_o.eq(Cat(*busy_l))
 168         comb += Cat(*godie_l).eq(self.go_die_i)
 169         comb += Cat(*shadow_l).eq(self.shadown_i)
 170         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 171         comb += Cat(*go_rd_l0).eq(self.go_rd0_i)
 172         comb += Cat(*go_rd_l1).eq(self.go_rd1_i)
 173         comb += Cat(*issue_l).eq(self.issue_i)
 174
 175         # connect data register input/output
 176
 177         # merge (OR) all integer FU / ALU outputs to a single value
 178         # XXX NOTE: this only works because there is a single "port"
 179         # protected by a single go_wr.  multi-issue requires a bus
 180         # to be inserted here.
 181         if self.units:
 182             data_o = treereduce(self.units, "data_o")
 183             comb += self.data_o.eq(data_o)
 184             if self.ldstmode:
 185                 addr_o = treereduce(self.units, "addr_o")
 186                 comb += self.addr_o.eq(addr_o)
 187
 188         for i, alu in enumerate(self.units):
 189             comb += alu.src1_i.eq(self.src1_i)
 190             comb += alu.src2_i.eq(self.src2_i)
 191
 192         if not self.ldstmode:
 193             return m
 194
 195         ldmem_l = []
 196         stmem_l = []
 197         go_ad_l = []
 198         go_st_l = []
 199         ld_l = []
 200         st_l = []
 201         adr_rel_l = []
 202         sto_rel_l = []
 203         for alu in self.units:
 204             ld_l.append(alu.ld_o)
 205             st_l.append(alu.st_o)
 206             adr_rel_l.append(alu.adr_rel_o)
 207             sto_rel_l.append(alu.sto_rel_o)
 208             ldmem_l.append(alu.load_mem_o)
 209             stmem_l.append(alu.stwd_mem_o)
 210             go_ad_l.append(alu.go_ad_i)
 211             go_st_l.append(alu.go_st_i)
 212         comb += self.ld_o.eq(Cat(*ld_l))
 213         comb += self.st_o.eq(Cat(*st_l))
 214         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 215         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 216         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 217         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 218         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 219         comb += Cat(*go_st_l).eq(self.go_st_i)
 220
 221         return m
 222
 223
 224 class CompUnitLDSTs(CompUnitsBase):
 225
 226     def __init__(self, rwid, opwid, n_ldsts, mem):
 227         """ Inputs:
 228
 229             * :rwid:   bit width of register file(s) - both FP and INT
 230             * :opwid:  operand bit width
 231         """
 232         self.opwid = opwid
 233
 234         # inputs
 235         self.oper_i = Signal(opwid, reset_less=True)
 236         self.imm_i = Signal(rwid, reset_less=True)
 237
 238         # Int ALUs
 239         self.alus = []
 240         for i in range(n_ldsts):
 241             self.alus.append(ALU(rwid))
 242
 243         units = []
 244         for alu in self.alus:
 245             aluopwid = 4  # see compldst.py for "internal" opcode
 246             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 247
 248         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 249
 250     def elaborate(self, platform):
 251         m = CompUnitsBase.elaborate(self, platform)
 252         comb = m.d.comb
 253
 254         # hand the same operation to all units, 4 lower bits though
 255         for alu in self.units:
 256             comb += alu.oper_i[0:4].eq(self.oper_i)
 257             comb += alu.imm_i.eq(self.imm_i)
 258             comb += alu.isalu_i.eq(0)
 259
 260         return m
 261
 262
 263 class CompUnitALUs(CompUnitsBase):
 264
 265     def __init__(self, rwid, opwid, n_alus):
 266         """ Inputs:
 267
 268             * :rwid:   bit width of register file(s) - both FP and INT
 269             * :opwid:  operand bit width
 270         """
 271         self.opwid = opwid
 272
 273         # inputs
 274         self.op = CompALUOpSubset("cua_i")
 275         self.oper_i = Signal(opwid, reset_less=True)
 276         self.imm_i = Signal(rwid, reset_less=True)
 277
 278         # Int ALUs
 279         alus = []
 280         for i in range(n_alus):
 281             alus.append(ALU(rwid))
 282
 283         units = []
 284         for alu in alus:
 285             aluopwid = 3  # extra bit for immediate mode
 286             units.append(MultiCompUnit(rwid, alu))
 287
 288         CompUnitsBase.__init__(self, rwid, units)
 289
 290     def elaborate(self, platform):
 291         m = CompUnitsBase.elaborate(self, platform)
 292         comb = m.d.comb
 293
 294         # hand the subset of operation to ALUs
 295         for alu in self.units:
 296             comb += alu.oper_i.eq(self.op)
 297             #comb += alu.oper_i[0:3].eq(self.oper_i)
 298             #comb += alu.imm_i.eq(self.imm_i)
 299
 300         return m
 301
 302
 303 class CompUnitBR(CompUnitsBase):
 304
 305     def __init__(self, rwid, opwid):
 306         """ Inputs:
 307
 308             * :rwid:   bit width of register file(s) - both FP and INT
 309             * :opwid:  operand bit width
 310
 311             Note: bgt unit is returned so that a shadow unit can be created
 312             for it
 313         """
 314         self.opwid = opwid
 315
 316         # inputs
 317         self.oper_i = Signal(opwid, reset_less=True)
 318         self.imm_i = Signal(rwid, reset_less=True)
 319
 320         # Branch ALU and CU
 321         self.bgt = BranchALU(rwid)
 322         aluopwid = 3  # extra bit for immediate mode
 323         self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
 324         CompUnitsBase.__init__(self, rwid, [self.br1])
 325
 326     def elaborate(self, platform):
 327         m = CompUnitsBase.elaborate(self, platform)
 328         comb = m.d.comb
 329
 330         # hand the same operation to all units
 331         for alu in self.units:
 332             comb += alu.oper_i.eq(self.oper_i)
 333             #comb += alu.imm_i.eq(self.imm_i)
 334
 335         return m
 336
 337
 338 class FunctionUnits(Elaboratable):
 339
 340     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 341         self.n_src, self.n_dst = n_src, n_dst
 342         self.n_reg = n_reg
 343         self.n_int_alus = nf = n_int_alus
 344
 345         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 346         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 347
 348         self.readable_o = Signal(n_int_alus, reset_less=True)
 349         self.writable_o = Signal(n_int_alus, reset_less=True)
 350
 351         # arrays
 352         src = []
 353         rsel = []
 354         rd = []
 355         for i in range(n_src):
 356             j = i + 1 # name numbering to match src1/src2
 357             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 358             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 359             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 360         dst = []
 361         dsel = []
 362         wr = []
 363         for i in range(n_dst):
 364             j = i + 1 # name numbering to match src1/src2
 365             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 366             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 367             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 368         wpnd = []
 369         pend = []
 370         for i in range(nf):
 371             j = i + 1 # name numbering to match src1/src2
 372             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 373             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 374
 375         self.dest_i = Array(dst)     # Dest in (top)
 376         self.src_i = Array(src)      # oper in (top)
 377
 378         # for Register File Select Lines (horizontal), per-reg
 379         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 380         self.src_rsel_o = Array(rsel)  # src reg (bot)
 381
 382         self.go_rd_i = Array(rd)
 383         self.go_wr_i = Array(wr)
 384
 385         self.go_die_i = Signal(n_int_alus, reset_less=True)
 386         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 387
 388         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 389
 390     def elaborate(self, platform):
 391         m = Module()
 392         comb = m.d.comb
 393         sync = m.d.sync
 394
 395         n_intfus = self.n_int_alus
 396
 397         # Integer FU-FU Dep Matrix
 398         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 399         m.submodules.intfudeps = intfudeps
 400         # Integer FU-Reg Dep Matrix
 401         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 402         m.submodules.intregdeps = intregdeps
 403
 404         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 405         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 406
 407         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 408         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 409
 410         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 411         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 412         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 413
 414         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 415         comb += intfudeps.go_die_i.eq(self.go_die_i)
 416         comb += self.readable_o.eq(intfudeps.readable_o)
 417         comb += self.writable_o.eq(intfudeps.writable_o)
 418
 419         # Connect function issue / arrays, and dest/src1/src2
 420         for i in range(self.n_src):
 421             print (i, self.go_rd_i, intfudeps.go_rd_i)
 422             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 423             comb += intregdeps.src_i[i].eq(self.src_i[i])
 424             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 425             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 426         for i in range(self.n_dst):
 427             print (i, self.go_wr_i, intfudeps.go_wr_i)
 428             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 429             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 430             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 431             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 432         comb += intregdeps.go_die_i.eq(self.go_die_i)
 433         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 434
 435         return m
 436
 437
 438 class Scoreboard(Elaboratable):
 439     def __init__(self, rwid, n_regs):
 440         """ Inputs:
 441
 442             * :rwid:   bit width of register file(s) - both FP and INT
 443             * :n_regs: depth of register file(s) - number of FP and INT regs
 444         """
 445         self.rwid = rwid
 446         self.n_regs = n_regs
 447
 448         # Register Files
 449         self.intregs = RegFileArray(rwid, n_regs)
 450         self.fpregs = RegFileArray(rwid, n_regs)
 451
 452         # Memory (test for now)
 453         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 454
 455         # issue q needs to get at these
 456         self.aluissue = IssueUnitGroup(2)
 457         self.lsissue = IssueUnitGroup(2)
 458         self.brissue = IssueUnitGroup(1)
 459         # and these
 460         self.alu_op = CompALUOpSubset("alu")
 461         self.br_oper_i = Signal(4, reset_less=True)
 462         self.br_imm_i = Signal(rwid, reset_less=True)
 463         self.ls_oper_i = Signal(4, reset_less=True)
 464         self.ls_imm_i = Signal(rwid, reset_less=True)
 465
 466         # inputs
 467         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 468         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 469         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 470         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 471
 472         # outputs
 473         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 474         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 475
 476         # for branch speculation experiment.  branch_direction = 0 if
 477         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 478         # branch_succ and branch_fail are requests to have the current
 479         # instruction be dependent on the branch unit "shadow" capability.
 480         self.branch_succ_i = Signal(reset_less=True)
 481         self.branch_fail_i = Signal(reset_less=True)
 482         self.branch_direction_o = Signal(2, reset_less=True)
 483
 484     def elaborate(self, platform):
 485         m = Module()
 486         comb = m.d.comb
 487         sync = m.d.sync
 488
 489         m.submodules.intregs = self.intregs
 490         m.submodules.fpregs = self.fpregs
 491         m.submodules.mem = mem = self.mem
 492
 493         # register ports
 494         int_dest = self.intregs.write_port("dest")
 495         int_src1 = self.intregs.read_port("src1")
 496         int_src2 = self.intregs.read_port("src2")
 497
 498         fp_dest = self.fpregs.write_port("dest")
 499         fp_src1 = self.fpregs.read_port("src1")
 500         fp_src2 = self.fpregs.read_port("src2")
 501
 502         # Int ALUs and BR ALUs
 503         n_int_alus = 5
 504         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 505         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 506
 507         # LDST Comp Units
 508         n_ldsts = 2
 509         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 510
 511         # Comp Units
 512         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 513         bgt = cub.bgt  # get at the branch computation unit
 514         br1 = cub.br1
 515
 516         # Int FUs
 517         fu_n_src = 2
 518         fu_n_dst = 1
 519         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 520                                                      fu_n_src, fu_n_dst)
 521
 522         # Memory FUs
 523         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 524
 525         # Memory Priority Picker 1: one gateway per memory port
 526         # picks 1 reader and 1 writer to intreg
 527         mempick1 = GroupPicker(n_ldsts, 1, 1)
 528         m.submodules.mempick1 = mempick1
 529
 530         # Count of number of FUs
 531         n_intfus = n_int_alus
 532         n_fp_fus = 0  # for now
 533
 534         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 535         # picks 1 reader and 1 writer to intreg
 536         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 537         m.submodules.intpick1 = ipick1
 538
 539         # INT/FP Issue Unit
 540         regdecode = RegDecode(self.n_regs)
 541         m.submodules.regdecode = regdecode
 542         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 543         m.submodules.issueunit = issueunit
 544
 545         # Shadow Matrix.  currently n_intfus shadows, to be used for
 546         # write-after-write hazards.  NOTE: there is one extra for branches,
 547         # so the shadow width is increased by 1
 548         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 549         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 550
 551         # record previous instruction to cast shadow on current instruction
 552         prev_shadow = Signal(n_intfus)
 553
 554         # Branch Speculation recorder.  tracks the success/fail state as
 555         # each instruction is issued, so that when the branch occurs the
 556         # allow/cancel can be issued as appropriate.
 557         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 558
 559         # ---------
 560         # ok start wiring things together...
 561         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 562         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 563         # ---------
 564
 565         # ---------
 566         # Issue Unit is where it starts.  set up some in/outs for this module
 567         # ---------
 568         comb += [regdecode.dest_i.eq(self.int_dest_i),
 569                  regdecode.src1_i.eq(self.int_src1_i),
 570                  regdecode.src2_i.eq(self.int_src2_i),
 571                  regdecode.enable_i.eq(self.reg_enable_i),
 572                  self.issue_o.eq(issueunit.issue_o)
 573                  ]
 574
 575         # take these to outside (issue needs them)
 576         comb += cua.op.eq(self.alu_op)
 577         comb += cub.oper_i.eq(self.br_oper_i)
 578         comb += cub.imm_i.eq(self.br_imm_i)
 579         comb += cul.oper_i.eq(self.ls_oper_i)
 580         comb += cul.imm_i.eq(self.ls_imm_i)
 581
 582         # TODO: issueunit.f (FP)
 583
 584         # and int function issue / busy arrays, and dest/src1/src2
 585         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 586         comb += intfus.src_i[0].eq(regdecode.src1_o)
 587         comb += intfus.src_i[1].eq(regdecode.src2_o)
 588
 589         fn_issue_o = issueunit.fn_issue_o
 590
 591         comb += intfus.fn_issue_i.eq(fn_issue_o)
 592         comb += issueunit.busy_i.eq(cu.busy_o)
 593         comb += self.busy_o.eq(cu.busy_o.bool())
 594
 595         # ---------
 596         # Memory Function Unit
 597         # ---------
 598         reset_b = Signal(cul.n_units, reset_less=True)
 599         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 600
 601         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 602         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 603         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 604
 605         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 606         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 607         # issue_i.  multi-issue gets a bit more complex but not a lot.
 608         prior_ldsts = Signal(cul.n_units, reset_less=True)
 609         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 610         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 611             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 612         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 613             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 614
 615         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 616         # just immediately activate go_adr
 617         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 618
 619         # connect up address data
 620         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 621         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 622
 623         # connect loadable / storable to go_ld/go_st.
 624         # XXX should only be done when the memory ld/st has actually happened!
 625         go_st_i = Signal(cul.n_units, reset_less=True)
 626         go_ld_i = Signal(cul.n_units, reset_less=True)
 627         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 628                            cul.adr_rel_o & cul.ld_o)
 629         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 630                            cul.sto_rel_o & cul.st_o)
 631         comb += memfus.go_ld_i.eq(go_ld_i)
 632         comb += memfus.go_st_i.eq(go_st_i)
 633         #comb += cul.go_wr_i.eq(go_ld_i)
 634         comb += cul.go_st_i.eq(go_st_i)
 635
 636         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 637         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 638         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 639
 640         # ---------
 641         # merge shadow matrices outputs
 642         # ---------
 643
 644         # these are explained in ShadowMatrix docstring, and are to be
 645         # connected to the FUReg and FUFU Matrices, to get them to reset
 646         anydie = Signal(n_intfus, reset_less=True)
 647         allshadown = Signal(n_intfus, reset_less=True)
 648         shreset = Signal(n_intfus, reset_less=True)
 649         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 650         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 651         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 652
 653         # ---------
 654         # connect fu-fu matrix
 655         # ---------
 656
 657         # Group Picker... done manually for now.
 658         go_rd_o = ipick1.go_rd_o
 659         go_wr_o = ipick1.go_wr_o
 660         go_rd_i = intfus.go_rd_i
 661         go_wr_i = intfus.go_wr_i
 662         go_die_i = intfus.go_die_i
 663         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 664         for i in range(fu_n_src):
 665             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 666         for i in range(fu_n_dst):
 667             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 668         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 669
 670         # Connect Picker
 671         # ---------
 672         int_rd_o = intfus.readable_o
 673         rrel_o = cu.rd_rel_o
 674         for i in range(fu_n_src):
 675             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 676             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 677         int_wr_o = intfus.writable_o
 678         for i in range(fu_n_dst):
 679             comb += ipick1.req_rel_i[i][0:n_intfus].eq(cu.done_o[0:n_intfus])
 680             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 681
 682         # ---------
 683         # Shadow Matrix
 684         # ---------
 685
 686         comb += shadows.issue_i.eq(fn_issue_o)
 687         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 688         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 689         # ---------
 690         # NOTE; this setup is for the instruction order preservation...
 691
 692         # connect shadows / go_dies to Computation Units
 693         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 694         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 695
 696         # ok connect first n_int_fu shadows to busy lines, to create an
 697         # instruction-order linked-list-like arrangement, using a bit-matrix
 698         # (instead of e.g. a ring buffer).
 699
 700         # when written, the shadow can be cancelled (and was good)
 701         for i in range(n_intfus):
 702             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 703             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 704             # because the comp unit is only free once completed
 705             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 706
 707         # *previous* instruction shadows *current* instruction, and, obviously,
 708         # if the previous is completed (!busy) don't cast the shadow!
 709         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 710         for i in range(n_intfus):
 711             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 712
 713         # ---------
 714         # ... and this is for branch speculation.  it uses the extra bit
 715         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 716         # only needs to set shadow_i, s_fail_i and s_good_i
 717
 718         # issue captures shadow_i (if enabled)
 719         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 720
 721         bactive = Signal(reset_less=True)
 722         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 723
 724         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 725         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 726             comb += bshadow.issue_i.eq(fn_issue_o)
 727             for i in range(n_intfus):
 728                 with m.If(fn_issue_o & (Const(1 << i))):
 729                     comb += bshadow.shadow_i[i][0].eq(1)
 730
 731         # finally, we need an indicator to the test infrastructure as to
 732         # whether the branch succeeded or failed, plus, link up to the
 733         # "recorder" of whether the instruction was under shadow or not
 734
 735         with m.If(br1.issue_i):
 736             sync += bspec.active_i.eq(1)
 737         with m.If(self.branch_succ_i):
 738             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 739         with m.If(self.branch_fail_i):
 740             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 741
 742         # branch is active (TODO: a better signal: this is over-using the
 743         # go_write signal - actually the branch should not be "writing")
 744         with m.If(br1.go_wr_i):
 745             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 746             sync += bspec.active_i.eq(0)
 747             comb += bspec.br_i.eq(1)
 748             # branch occurs if data == 1, failed if data == 0
 749             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 750             for i in range(n_intfus):
 751                 # *expected* direction of the branch matched against *actual*
 752                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 753                 # ... or it didn't
 754                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 755
 756         # ---------
 757         # Connect Register File(s)
 758         # ---------
 759         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 760         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 761         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 762
 763         # connect ALUs to regfile
 764         comb += int_dest.data_i.eq(cu.data_o)
 765         comb += cu.src1_i.eq(int_src1.data_o)
 766         comb += cu.src2_i.eq(int_src2.data_o)
 767
 768         # connect ALU Computation Units
 769         for i in range(fu_n_src):
 770             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 771         for i in range(fu_n_dst):
 772             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 773         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 774
 775         return m
 776
 777     def __iter__(self):
 778         yield from self.intregs
 779         yield from self.fpregs
 780         yield self.int_dest_i
 781         yield self.int_src1_i
 782         yield self.int_src2_i
 783         yield self.issue_o
 784         yield self.branch_succ_i
 785         yield self.branch_fail_i
 786         yield self.branch_direction_o
 787
 788     def ports(self):
 789         return list(self)
 790
 791
 792 class IssueToScoreboard(Elaboratable):
 793
 794     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 795         self.qlen = qlen
 796         self.n_in = n_in
 797         self.n_out = n_out
 798         self.rwid = rwid
 799         self.opw = opwid
 800         self.n_regs = n_regs
 801
 802         mqbits = unsigned(int(log(qlen) / log(2))+2)
 803         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 804         self.p_ready_o = Signal()  # instructions were added
 805         self.data_i = Instruction._nq(n_in, "data_i")
 806
 807         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 808         self.qlen_o = Signal(mqbits, reset_less=True)
 809
 810     def elaborate(self, platform):
 811         m = Module()
 812         comb = m.d.comb
 813         sync = m.d.sync
 814
 815         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 816                           self.n_in, self.n_out)
 817         sc = Scoreboard(self.rwid, self.n_regs)
 818         m.submodules.iq = iq
 819         m.submodules.sc = sc
 820
 821         # get at the regfile for testing
 822         self.intregs = sc.intregs
 823
 824         # and the "busy" signal and instruction queue length
 825         comb += self.busy_o.eq(sc.busy_o)
 826         comb += self.qlen_o.eq(iq.qlen_o)
 827
 828         # link up instruction queue
 829         comb += iq.p_add_i.eq(self.p_add_i)
 830         comb += self.p_ready_o.eq(iq.p_ready_o)
 831         for i in range(self.n_in):
 832             comb += eq(iq.data_i[i], self.data_i[i])
 833
 834         # take instruction and process it.  note that it's possible to
 835         # "inspect" the queue contents *without* actually removing the
 836         # items.  items are only removed when the
 837
 838         # in "waiting" state
 839         wait_issue_br = Signal()
 840         wait_issue_alu = Signal()
 841         wait_issue_ls = Signal()
 842
 843         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 844             # set instruction pop length to 1 if the unit accepted
 845             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 846                 with m.If(iq.qlen_o != 0):
 847                     comb += iq.n_sub_i.eq(1)
 848             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 849                 with m.If(iq.qlen_o != 0):
 850                     comb += iq.n_sub_i.eq(1)
 851             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 852                 with m.If(iq.qlen_o != 0):
 853                     comb += iq.n_sub_i.eq(1)
 854
 855         # see if some instruction(s) are here.  note that this is
 856         # "inspecting" the in-place queue.  note also that on the
 857         # cycle following "waiting" for fn_issue_o to be set, the
 858         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 859         with m.If(iq.qlen_o != 0):
 860             # get the operands and operation
 861             instr = iq.data_o[0]
 862             imm = instr.imm_data.data
 863             dest = instr.write_reg.data
 864             src1 = instr.read_reg1.data
 865             src2 = instr.read_reg2.data
 866             op = instr.insn_type
 867             fu = instr.fn_unit
 868             opi = instr.imm_data.ok  # immediate set
 869
 870             # set the src/dest regs
 871             comb += sc.int_dest_i.eq(dest)
 872             comb += sc.int_src1_i.eq(src1)
 873             comb += sc.int_src2_i.eq(src2)
 874             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 875
 876             # choose a Function-Unit-Group
 877             with m.If(fu == Function.ALU):  # alu
 878                 comb += sc.alu_op.eq_from_execute1(instr)
 879                 comb += sc.aluissue.insn_i.eq(1)
 880                 comb += wait_issue_alu.eq(1)
 881             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 882                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 883                 comb += sc.br_imm_i.eq(imm)
 884                 comb += sc.brissue.insn_i.eq(1)
 885                 comb += wait_issue_br.eq(1)
 886             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 887                 # see compldst.py
 888                 # bit 0: ADD/SUB
 889                 # bit 1: immed
 890                 # bit 4: LD
 891                 # bit 5: ST
 892                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 893                 comb += sc.ls_imm_i.eq(imm)
 894                 comb += sc.lsissue.insn_i.eq(1)
 895                 comb += wait_issue_ls.eq(1)
 896
 897             # XXX TODO
 898             # these indicate that the instruction is to be made
 899             # shadow-dependent on
 900             # (either) branch success or branch fail
 901             # yield sc.branch_fail_i.eq(branch_fail)
 902             # yield sc.branch_succ_i.eq(branch_success)
 903
 904         return m
 905
 906     def __iter__(self):
 907         yield self.p_ready_o
 908         for o in self.data_i:
 909             yield from list(o)
 910         yield self.p_add_i
 911
 912     def ports(self):
 913         return list(self)
 914
 915
 916 def power_instr_q(dut, pdecode2, ins, code):
 917     instrs = [pdecode2.e]
 918
 919     sendlen = 1
 920     for idx, instr in enumerate(instrs):
 921         yield dut.data_i[idx].eq(instr)
 922         insn_type = yield instr.insn_type
 923         fn_unit = yield instr.fn_unit
 924         print("senddata ", idx, insn_type, fn_unit, instr)
 925     yield dut.p_add_i.eq(sendlen)
 926     yield
 927     o_p_ready = yield dut.p_ready_o
 928     while not o_p_ready:
 929         yield
 930         o_p_ready = yield dut.p_ready_o
 931
 932     yield dut.p_add_i.eq(0)
 933
 934
 935 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 936             branch_success, branch_fail):
 937     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 938                 'imm_data': (imm, op_imm),
 939                'read_reg1': src1, 'read_reg2': src2}]
 940
 941     sendlen = 1
 942     for idx, instr in enumerate(instrs):
 943         imm, op_imm = instr['imm_data']
 944         reg1 = instr['read_reg1']
 945         reg2 = instr['read_reg2']
 946         dest = instr['write_reg']
 947         insn_type = instr['insn_type']
 948         fn_unit = instr['fn_unit']
 949         yield dut.data_i[idx].insn_type.eq(insn_type)
 950         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 951         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 952         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 953         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 954         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 955         yield dut.data_i[idx].write_reg.data.eq(dest)
 956         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 957         yield dut.data_i[idx].imm_data.data.eq(imm)
 958         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 959         di = yield dut.data_i[idx]
 960         print("senddata %d %x" % (idx, di))
 961     yield dut.p_add_i.eq(sendlen)
 962     yield
 963     o_p_ready = yield dut.p_ready_o
 964     while not o_p_ready:
 965         yield
 966         o_p_ready = yield dut.p_ready_o
 967
 968     yield dut.p_add_i.eq(0)
 969
 970
 971 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 972     yield from disable_issue(dut)
 973     yield dut.int_dest_i.eq(dest)
 974     yield dut.int_src1_i.eq(src1)
 975     yield dut.int_src2_i.eq(src2)
 976     if (op & (0x3 << 2)) != 0:  # branch
 977         yield dut.brissue.insn_i.eq(1)
 978         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 979         yield dut.br_imm_i.eq(imm)
 980         dut_issue = dut.brissue
 981     else:
 982         yield dut.aluissue.insn_i.eq(1)
 983         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 984         yield dut.alu_imm_i.eq(imm)
 985         dut_issue = dut.aluissue
 986     yield dut.reg_enable_i.eq(1)
 987
 988     # these indicate that the instruction is to be made shadow-dependent on
 989     # (either) branch success or branch fail
 990     yield dut.branch_fail_i.eq(branch_fail)
 991     yield dut.branch_succ_i.eq(branch_success)
 992
 993     yield
 994     yield from wait_for_issue(dut, dut_issue)
 995
 996
 997 def print_reg(dut, rnums):
 998     rs = []
 999     for rnum in rnums:
1000         reg = yield dut.intregs.regs[rnum].reg
1001         rs.append("%x" % reg)
1002     rnums = map(str, rnums)
1003     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1004
1005
1006 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1007     insts = []
1008     for i in range(n_ops):
1009         src1 = randint(1, dut.n_regs-1)
1010         src2 = randint(1, dut.n_regs-1)
1011         imm = randint(1, (1 << dut.rwid)-1)
1012         dest = randint(1, dut.n_regs-1)
1013         op = randint(0, max_opnums)
1014         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
1015
1016         if shadowing:
1017             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1018         else:
1019             insts.append((src1, src2, dest, op, opi, imm))
1020     return insts
1021
1022
1023 def wait_for_busy_clear(dut):
1024     while True:
1025         busy_o = yield dut.busy_o
1026         if not busy_o:
1027             break
1028         print("busy",)
1029         yield
1030
1031
1032 def disable_issue(dut):
1033     yield dut.aluissue.insn_i.eq(0)
1034     yield dut.brissue.insn_i.eq(0)
1035     yield dut.lsissue.insn_i.eq(0)
1036
1037
1038 def wait_for_issue(dut, dut_issue):
1039     while True:
1040         issue_o = yield dut_issue.fn_issue_o
1041         if issue_o:
1042             yield from disable_issue(dut)
1043             yield dut.reg_enable_i.eq(0)
1044             break
1045         print("busy",)
1046         # yield from print_reg(dut, [1,2,3])
1047         yield
1048     # yield from print_reg(dut, [1,2,3])
1049
1050
1051 def scoreboard_branch_sim(dut, alusim):
1052
1053     iseed = 3
1054
1055     for i in range(1):
1056
1057         print("rseed", iseed)
1058         seed(iseed)
1059         iseed += 1
1060
1061         yield dut.branch_direction_o.eq(0)
1062
1063         # set random values in the registers
1064         for i in range(1, dut.n_regs):
1065             val = 31+i*3
1066             val = randint(0, (1 << alusim.rwidth)-1)
1067             yield dut.intregs.regs[i].reg.eq(val)
1068             alusim.setval(i, val)
1069
1070         if False:
1071             # create some instructions: branches create a tree
1072             insts = create_random_ops(dut, 1, True, 1)
1073             #insts.append((6, 6, 1, 2, (0, 0)))
1074             #insts.append((4, 3, 3, 0, (0, 0)))
1075
1076             src1 = randint(1, dut.n_regs-1)
1077             src2 = randint(1, dut.n_regs-1)
1078             #op = randint(4, 7)
1079             op = 4  # only BGT at the moment
1080
1081             branch_ok = create_random_ops(dut, 1, True, 1)
1082             branch_fail = create_random_ops(dut, 1, True, 1)
1083
1084             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1085
1086         if True:
1087             insts = []
1088             insts.append((3, 5, 2, 0, (0, 0)))
1089             branch_ok = []
1090             branch_fail = []
1091             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1092             branch_ok.append(None)
1093             branch_fail.append((1, 1, 2, 0, (0, 1)))
1094             #branch_fail.append( None )
1095             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1096
1097         siminsts = deepcopy(insts)
1098
1099         # issue instruction(s)
1100         i = -1
1101         instrs = insts
1102         branch_direction = 0
1103         while instrs:
1104             yield
1105             yield
1106             i += 1
1107             branch_direction = yield dut.branch_direction_o  # way branch went
1108             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1109             if branch_direction == 1 and shadow_on:
1110                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1111                 continue  # branch was "success" and this is a "failed"... skip
1112             if branch_direction == 2 and shadow_off:
1113                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1114                 continue  # branch was "fail" and this is a "success"... skip
1115             if branch_direction != 0:
1116                 shadow_on = 0
1117                 shadow_off = 0
1118             is_branch = op >= 4
1119             if is_branch:
1120                 branch_ok, branch_fail = dest
1121                 dest = src2
1122                 # ok zip up the branch success / fail instructions and
1123                 # drop them into the queue, one marked "to have branch success"
1124                 # the other to be marked shadow branch "fail".
1125                 # one out of each of these will be cancelled
1126                 for ok, fl in zip(branch_ok, branch_fail):
1127                     if ok:
1128                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1129                     if fl:
1130                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1131             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1132                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1133             yield from int_instr(dut, op, src1, src2, dest,
1134                                  shadow_on, shadow_off)
1135
1136         # wait for all instructions to stop before checking
1137         yield
1138         yield from wait_for_busy_clear(dut)
1139
1140         i = -1
1141         while siminsts:
1142             instr = siminsts.pop(0)
1143             if instr is None:
1144                 continue
1145             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1146             i += 1
1147             is_branch = op >= 4
1148             if is_branch:
1149                 branch_ok, branch_fail = dest
1150                 dest = src2
1151             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1152                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1153             branch_res = alusim.op(op, src1, src2, dest)
1154             if is_branch:
1155                 if branch_res:
1156                     siminsts += branch_ok
1157                 else:
1158                     siminsts += branch_fail
1159
1160         # check status
1161         yield from alusim.check(dut)
1162         yield from alusim.dump(dut)
1163
1164
1165 def power_sim(m, dut, pdecode2, instruction, alusim):
1166
1167     seed(0)
1168
1169     for i in range(1):
1170
1171         # set random values in the registers
1172         for i in range(1, dut.n_regs):
1173             #val = randint(0, (1<<alusim.rwidth)-1)
1174             #val = 31+i*3
1175             val = i # XXX actually, not random at all
1176             yield dut.intregs.regs[i].reg.eq(val)
1177             alusim.setval(i, val)
1178
1179         # create some instructions
1180         lst = [#"addi 3, 0, 0x1234",
1181                #"addi 2, 0, 0x4321",
1182                "add  1, 3, 2"]
1183         with Program(lst) as program:
1184             gen = program.generate_instructions()
1185
1186             # issue instruction(s), wait for issue to be free before proceeding
1187             for ins, code in zip(gen, program.assembly.splitlines()):
1188                 yield instruction.eq(ins)          # raw binary instr.
1189                 yield Delay(1e-6)
1190
1191                 print("binary 0x{:X}".format(ins & 0xffffffff))
1192                 print("assembly", code)
1193
1194                 #alusim.op(op, opi, imm, src1, src2, dest)
1195                 yield from power_instr_q(dut, pdecode2, ins, code)
1196
1197         # wait for all instructions to stop before checking
1198         while True:
1199             iqlen = yield dut.qlen_o
1200             if iqlen == 0:
1201                 break
1202             yield
1203         yield
1204         yield
1205         yield
1206         yield
1207         yield from wait_for_busy_clear(dut)
1208
1209         # check status
1210         yield from alusim.check(dut)
1211         yield from alusim.dump(dut)
1212
1213
1214 def scoreboard_sim(dut, alusim):
1215
1216     seed(0)
1217
1218     for i in range(1):
1219
1220         # set random values in the registers
1221         for i in range(1, dut.n_regs):
1222             #val = randint(0, (1<<alusim.rwidth)-1)
1223             #val = 31+i*3
1224             val = i
1225             yield dut.intregs.regs[i].reg.eq(val)
1226             alusim.setval(i, val)
1227
1228         # create some instructions (some random, some regression tests)
1229         instrs = []
1230         if False:
1231             instrs = create_random_ops(dut, 15, True, 4)
1232
1233         if False:  # LD/ST test (with immediate)
1234             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1235             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1236
1237         if False:
1238             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1239
1240         if False:
1241             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1242             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1243             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1244
1245         if True:
1246             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1247                            0, 0, (0, 0)))
1248             instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1249                            0, 0, (0, 0)))
1250         if False:
1251             instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1252                            1, 7, (0, 0)))
1253         if False:
1254             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1255                            0, 0, (0, 0)))
1256
1257         if False:
1258             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1259             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1260             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1261             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1262             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1263
1264         if False:
1265             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1266             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1267             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1268
1269         if False:
1270             instrs.append((5, 6, 2, 1))
1271             instrs.append((2, 2, 4, 0))
1272             #instrs.append((2, 2, 3, 1))
1273
1274         if False:
1275             instrs.append((2, 1, 2, 3))
1276
1277         if False:
1278             instrs.append((2, 6, 2, 1))
1279             instrs.append((2, 1, 2, 0))
1280
1281         if False:
1282             instrs.append((1, 2, 7, 2))
1283             instrs.append((7, 1, 5, 0))
1284             instrs.append((4, 4, 1, 1))
1285
1286         if False:
1287             instrs.append((5, 6, 2, 2))
1288             instrs.append((1, 1, 4, 1))
1289             instrs.append((6, 5, 3, 0))
1290
1291         if False:
1292             # Write-after-Write Hazard
1293             instrs.append((3, 6, 7, 2))
1294             instrs.append((4, 4, 7, 1))
1295
1296         if False:
1297             # self-read/write-after-write followed by Read-after-Write
1298             instrs.append((1, 1, 1, 1))
1299             instrs.append((1, 5, 3, 0))
1300
1301         if False:
1302             # Read-after-Write followed by self-read-after-write
1303             instrs.append((5, 6, 1, 2))
1304             instrs.append((1, 1, 1, 1))
1305
1306         if False:
1307             # self-read-write sandwich
1308             instrs.append((5, 6, 1, 2))
1309             instrs.append((1, 1, 1, 1))
1310             instrs.append((1, 5, 3, 0))
1311
1312         if False:
1313             # very weird failure
1314             instrs.append((5, 2, 5, 2))
1315             instrs.append((2, 6, 3, 0))
1316             instrs.append((4, 2, 2, 1))
1317
1318         if False:
1319             v1 = 4
1320             yield dut.intregs.regs[5].reg.eq(v1)
1321             alusim.setval(5, v1)
1322             yield dut.intregs.regs[3].reg.eq(5)
1323             alusim.setval(3, 5)
1324             instrs.append((5, 3, 3, 4, (0, 0)))
1325             instrs.append((4, 2, 1, 2, (0, 1)))
1326
1327         if False:
1328             v1 = 6
1329             yield dut.intregs.regs[5].reg.eq(v1)
1330             alusim.setval(5, v1)
1331             yield dut.intregs.regs[3].reg.eq(5)
1332             alusim.setval(3, 5)
1333             instrs.append((5, 3, 3, 4, (0, 0)))
1334             instrs.append((4, 2, 1, 2, (1, 0)))
1335
1336         if False:
1337             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1338             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1339             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1340             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1341             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1342             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1343             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1344             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1345             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1346
1347         # issue instruction(s), wait for issue to be free before proceeding
1348         for i, instr in enumerate(instrs):
1349             print (i, instr)
1350             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1351
1352             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1353                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1354             alusim.op(op, opi, imm, src1, src2, dest)
1355             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1356                                br_ok, br_fail)
1357
1358         # wait for all instructions to stop before checking
1359         while True:
1360             iqlen = yield dut.qlen_o
1361             if iqlen == 0:
1362                 break
1363             yield
1364         yield
1365         yield
1366         yield
1367         yield
1368         yield from wait_for_busy_clear(dut)
1369
1370         # check status
1371         yield from alusim.check(dut)
1372         yield from alusim.dump(dut)
1373
1374
1375 def test_scoreboard():
1376     regwidth = 64
1377     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1378     alusim = RegSim(regwidth, 8)
1379     memsim = MemSim(16, 8)
1380
1381     m = Module()
1382     comb = m.d.comb
1383     instruction = Signal(32)
1384
1385     # set up the decoder (and simulator, later)
1386     pdecode = create_pdecode()
1387     #simulator = ISA(pdecode, initial_regs)
1388
1389     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1390     m.submodules.sim = dut
1391
1392     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1393     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1394
1395     vl = rtlil.convert(m, ports=dut.ports())
1396     with open("test_scoreboard6600.il", "w") as f:
1397         f.write(vl)
1398
1399     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1400                    vcd_name='test_powerboard6600.vcd')
1401
1402     #run_simulation(dut, scoreboard_sim(dut, alusim),
1403     #               vcd_name='test_scoreboard6600.vcd')
1404
1405     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1406     #                    vcd_name='test_scoreboard6600.vcd')
1407
1408
1409 if __name__ == '__main__':
1410     test_scoreboard()