src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit
  19 from soc.experiment.compldst import LDSTCompUnit
  20 from soc.experiment.testmem import TestMemory
  21
  22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  23
  24 from soc.decoder.power_enums import InternalOp, Function
  25 from soc.decoder.power_decoder import (create_pdecode)
  26 from soc.decoder.power_decoder2 import (PowerDecode2)
  27 from soc.simulator.program import Program
  28
  29
  30 from nmutil.latch import SRLatch
  31 from nmutil.nmoperator import eq
  32
  33 from random import randint, seed
  34 from copy import deepcopy
  35 from math import log
  36
  37 from soc.experiment.sim import RegSim, MemSim
  38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  39
  40
  41 class CompUnitsBase(Elaboratable):
  42     """ Computation Unit Base class.
  43
  44         Amazingly, this class works recursively.  It's supposed to just
  45         look after some ALUs (that can handle the same operations),
  46         grouping them together, however it turns out that the same code
  47         can also group *groups* of Computation Units together as well.
  48
  49         Basically it was intended just to concatenate the ALU's issue,
  50         go_rd etc. signals together, which start out as bits and become
  51         sequences.  Turns out that the same trick works just as well
  52         on Computation Units!
  53
  54         So this class may be used recursively to present a top-level
  55         sequential concatenation of all the signals in and out of
  56         ALUs, whilst at the same time making it convenient to group
  57         ALUs together.
  58
  59         At the lower level, the intent is that groups of (identical)
  60         ALUs may be passed the same operation.  Even beyond that,
  61         the intent is that that group of (identical) ALUs actually
  62         share the *same pipeline* and as such become a "Concurrent
  63         Computation Unit" as defined by Mitch Alsup (see section
  64         11.4.9.3)
  65     """
  66
  67     def __init__(self, rwid, units, ldstmode=False):
  68         """ Inputs:
  69
  70             * :rwid:   bit width of register file(s) - both FP and INT
  71             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  72         """
  73         self.units = units
  74         self.ldstmode = ldstmode
  75         self.rwid = rwid
  76         self.rwid = rwid
  77         if units and isinstance(units[0], CompUnitsBase):
  78             self.n_units = 0
  79             for u in self.units:
  80                 self.n_units += u.n_units
  81         else:
  82             self.n_units = len(units)
  83
  84         n_units = self.n_units
  85
  86         # inputs
  87         self.issue_i = Signal(n_units, reset_less=True)
  88         self.go_rd0_i = Signal(n_units, reset_less=True)
  89         self.go_rd1_i = Signal(n_units, reset_less=True)
  90         self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK!
  91         self.go_wr_i = Signal(n_units, reset_less=True)
  92         self.shadown_i = Signal(n_units, reset_less=True)
  93         self.go_die_i = Signal(n_units, reset_less=True)
  94         if ldstmode:
  95             self.go_ad_i = Signal(n_units, reset_less=True)
  96             self.go_st_i = Signal(n_units, reset_less=True)
  97
  98         # outputs
  99         self.busy_o = Signal(n_units, reset_less=True)
 100         self.rd_rel0_o = Signal(n_units, reset_less=True)
 101         self.rd_rel1_o = Signal(n_units, reset_less=True)
 102         self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK!
 103         self.req_rel_o = Signal(n_units, reset_less=True)
 104         self.done_o = Signal(n_units, reset_less=True)
 105         if ldstmode:
 106             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 107             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 108             self.adr_rel_o = Signal(n_units, reset_less=True)
 109             self.sto_rel_o = Signal(n_units, reset_less=True)
 110             self.load_mem_o = Signal(n_units, reset_less=True)
 111             self.stwd_mem_o = Signal(n_units, reset_less=True)
 112             self.addr_o = Signal(rwid, reset_less=True)
 113
 114         # in/out register data (note: not register#, actual data)
 115         self.data_o = Signal(rwid, reset_less=True)
 116         self.src1_i = Signal(rwid, reset_less=True)
 117         self.src2_i = Signal(rwid, reset_less=True)
 118         # input operand
 119
 120     def elaborate(self, platform):
 121         m = Module()
 122         comb = m.d.comb
 123
 124         for i, alu in enumerate(self.units):
 125             setattr(m.submodules, "comp%d" % i, alu)
 126
 127         go_rd_l0 = []
 128         go_rd_l1 = []
 129         go_wr_l = []
 130         issue_l = []
 131         busy_l = []
 132         req_rel_l = []
 133         done_l = []
 134         rd_rel0_l = []
 135         rd_rel1_l = []
 136         shadow_l = []
 137         godie_l = []
 138         for alu in self.units:
 139             req_rel_l.append(alu.req_rel_o)
 140             done_l.append(alu.done_o)
 141             shadow_l.append(alu.shadown_i)
 142             godie_l.append(alu.go_die_i)
 143             print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 144             if isinstance(alu, LDSTCompUnit) or \
 145                isinstance(alu, ComputationUnitNoDelay):
 146                 if isinstance(alu, CompUnitsBase):
 147                     ulen = alu.n_units
 148                 else:
 149                     ulen = 1
 150                 rd_rel0_l.append(Const(0, 64)) # FIXME
 151                 rd_rel1_l.append(Const(0, 64)) # FIXME
 152                 dummy1 = Signal(ulen, reset_less=True)
 153                 dummy2 = Signal(ulen, reset_less=True)
 154                 dummy3 = Signal(ulen, reset_less=True)
 155                 dummy4 = Signal(ulen, reset_less=True)
 156                 dummy5 = Signal(ulen, reset_less=True)
 157                 go_wr_l.append(dummy1)
 158                 go_rd_l0.append(dummy2)
 159                 go_rd_l1.append(dummy3)
 160                 issue_l.append(dummy4)
 161                 busy_l.append(dummy5)
 162             else:
 163                 rd_rel0_l.append(alu.rd_rel_o[0])
 164                 rd_rel1_l.append(alu.rd_rel_o[1])
 165                 go_wr_l.append(alu.go_wr_i[0])
 166                 go_rd_l0.append(alu.go_rd_i[0])
 167                 go_rd_l1.append(alu.go_rd_i[1])
 168                 issue_l.append(alu.issue_i)
 169                 busy_l.append(alu.busy_o)
 170         comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l))
 171         comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l))
 172         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 173         comb += self.done_o.eq(Cat(*done_l))
 174         comb += self.busy_o.eq(Cat(*busy_l))
 175         comb += Cat(*godie_l).eq(self.go_die_i)
 176         comb += Cat(*shadow_l).eq(self.shadown_i)
 177         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 178         comb += Cat(*go_rd_l0).eq(self.go_rd0_i)
 179         comb += Cat(*go_rd_l1).eq(self.go_rd1_i)
 180         comb += Cat(*issue_l).eq(self.issue_i)
 181
 182         # connect data register input/output
 183
 184         # merge (OR) all integer FU / ALU outputs to a single value
 185         # XXX NOTE: this only works because there is a single "port"
 186         # protected by a single go_wr.  multi-issue requires a bus
 187         # to be inserted here.
 188         if self.units:
 189             data_o = treereduce(self.units, "data_o")
 190             comb += self.data_o.eq(data_o)
 191             if self.ldstmode:
 192                 addr_o = treereduce(self.units, "addr_o")
 193                 comb += self.addr_o.eq(addr_o)
 194
 195         for i, alu in enumerate(self.units):
 196             comb += alu.src1_i.eq(self.src1_i)
 197             comb += alu.src2_i.eq(self.src2_i)
 198
 199         if not self.ldstmode:
 200             return m
 201
 202         ldmem_l = []
 203         stmem_l = []
 204         go_ad_l = []
 205         go_st_l = []
 206         ld_l = []
 207         st_l = []
 208         adr_rel_l = []
 209         sto_rel_l = []
 210         for alu in self.units:
 211             ld_l.append(alu.ld_o)
 212             st_l.append(alu.st_o)
 213             adr_rel_l.append(alu.adr_rel_o)
 214             sto_rel_l.append(alu.sto_rel_o)
 215             ldmem_l.append(alu.load_mem_o)
 216             stmem_l.append(alu.stwd_mem_o)
 217             go_ad_l.append(alu.go_ad_i)
 218             go_st_l.append(alu.go_st_i)
 219         comb += self.ld_o.eq(Cat(*ld_l))
 220         comb += self.st_o.eq(Cat(*st_l))
 221         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 222         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 223         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 224         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 225         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 226         comb += Cat(*go_st_l).eq(self.go_st_i)
 227
 228         return m
 229
 230
 231 class CompUnitLDSTs(CompUnitsBase):
 232
 233     def __init__(self, rwid, opwid, n_ldsts, mem):
 234         """ Inputs:
 235
 236             * :rwid:   bit width of register file(s) - both FP and INT
 237             * :opwid:  operand bit width
 238         """
 239         self.opwid = opwid
 240
 241         # inputs
 242         self.oper_i = Signal(opwid, reset_less=True)
 243         self.imm_i = Signal(rwid, reset_less=True)
 244
 245         # Int ALUs
 246         self.alus = []
 247         for i in range(n_ldsts):
 248             self.alus.append(ALU(rwid))
 249
 250         units = []
 251         for alu in self.alus:
 252             aluopwid = 4  # see compldst.py for "internal" opcode
 253             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 254
 255         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 256
 257     def elaborate(self, platform):
 258         m = CompUnitsBase.elaborate(self, platform)
 259         comb = m.d.comb
 260
 261         # hand the same operation to all units, 4 lower bits though
 262         for alu in self.units:
 263             comb += alu.oper_i[0:4].eq(self.oper_i)
 264             comb += alu.imm_i.eq(self.imm_i)
 265             comb += alu.isalu_i.eq(0)
 266
 267         return m
 268
 269
 270 class CompUnitALUs(CompUnitsBase):
 271
 272     def __init__(self, rwid, opwid, n_alus):
 273         """ Inputs:
 274
 275             * :rwid:   bit width of register file(s) - both FP and INT
 276             * :opwid:  operand bit width
 277         """
 278         self.opwid = opwid
 279
 280         # inputs
 281         self.op = CompALUOpSubset("cua_i")
 282         self.oper_i = Signal(opwid, reset_less=True)
 283         self.imm_i = Signal(rwid, reset_less=True)
 284
 285         # Int ALUs
 286         alus = []
 287         for i in range(n_alus):
 288             alus.append(ALU(rwid))
 289
 290         units = []
 291         for alu in alus:
 292             aluopwid = 3  # extra bit for immediate mode
 293             units.append(MultiCompUnit(rwid, alu))
 294
 295         CompUnitsBase.__init__(self, rwid, units)
 296
 297     def elaborate(self, platform):
 298         m = CompUnitsBase.elaborate(self, platform)
 299         comb = m.d.comb
 300
 301         # hand the subset of operation to ALUs
 302         for alu in self.units:
 303             comb += alu.oper_i.eq(self.op)
 304             #comb += alu.oper_i[0:3].eq(self.oper_i)
 305             #comb += alu.imm_i.eq(self.imm_i)
 306
 307         return m
 308
 309
 310 class CompUnitBR(CompUnitsBase):
 311
 312     def __init__(self, rwid, opwid):
 313         """ Inputs:
 314
 315             * :rwid:   bit width of register file(s) - both FP and INT
 316             * :opwid:  operand bit width
 317
 318             Note: bgt unit is returned so that a shadow unit can be created
 319             for it
 320         """
 321         self.opwid = opwid
 322
 323         # inputs
 324         self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
 325         self.oper_i = Signal(opwid, reset_less=True)
 326         self.imm_i = Signal(rwid, reset_less=True)
 327
 328         # Branch ALU and CU
 329         self.bgt = BranchALU(rwid)
 330         aluopwid = 3  # extra bit for immediate mode
 331         self.br1 = MultiCompUnit(rwid, self.bgt)
 332         CompUnitsBase.__init__(self, rwid, [self.br1])
 333
 334     def elaborate(self, platform):
 335         m = CompUnitsBase.elaborate(self, platform)
 336         comb = m.d.comb
 337
 338         # hand the same operation to all units
 339         for alu in self.units:
 340             #comb += alu.oper_i.eq(self.op) # TODO
 341             comb += alu.oper_i.eq(self.oper_i)
 342             #comb += alu.imm_i.eq(self.imm_i)
 343
 344         return m
 345
 346
 347 class FunctionUnits(Elaboratable):
 348
 349     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 350         self.n_src, self.n_dst = n_src, n_dst
 351         self.n_reg = n_reg
 352         self.n_int_alus = nf = n_int_alus
 353
 354         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 355         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 356
 357         self.readable_o = Signal(n_int_alus, reset_less=True)
 358         self.writable_o = Signal(n_int_alus, reset_less=True)
 359
 360         # arrays
 361         src = []
 362         rsel = []
 363         rd = []
 364         for i in range(n_src):
 365             j = i + 1 # name numbering to match src1/src2
 366             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 367             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 368             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 369         dst = []
 370         dsel = []
 371         wr = []
 372         for i in range(n_dst):
 373             j = i + 1 # name numbering to match src1/src2
 374             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 375             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 376             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 377         wpnd = []
 378         pend = []
 379         for i in range(nf):
 380             j = i + 1 # name numbering to match src1/src2
 381             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 382             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 383
 384         self.dest_i = Array(dst)     # Dest in (top)
 385         self.src_i = Array(src)      # oper in (top)
 386
 387         # for Register File Select Lines (horizontal), per-reg
 388         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 389         self.src_rsel_o = Array(rsel)  # src reg (bot)
 390
 391         self.go_rd_i = Array(rd)
 392         self.go_wr_i = Array(wr)
 393
 394         self.go_die_i = Signal(n_int_alus, reset_less=True)
 395         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 396
 397         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 398
 399     def elaborate(self, platform):
 400         m = Module()
 401         comb = m.d.comb
 402         sync = m.d.sync
 403
 404         n_intfus = self.n_int_alus
 405
 406         # Integer FU-FU Dep Matrix
 407         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 408         m.submodules.intfudeps = intfudeps
 409         # Integer FU-Reg Dep Matrix
 410         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 411         m.submodules.intregdeps = intregdeps
 412
 413         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 414         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 415
 416         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 417         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 418
 419         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 420         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 421         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 422
 423         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 424         comb += intfudeps.go_die_i.eq(self.go_die_i)
 425         comb += self.readable_o.eq(intfudeps.readable_o)
 426         comb += self.writable_o.eq(intfudeps.writable_o)
 427
 428         # Connect function issue / arrays, and dest/src1/src2
 429         for i in range(self.n_src):
 430             print (i, self.go_rd_i, intfudeps.go_rd_i)
 431             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 432             comb += intregdeps.src_i[i].eq(self.src_i[i])
 433             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 434             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 435         for i in range(self.n_dst):
 436             print (i, self.go_wr_i, intfudeps.go_wr_i)
 437             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 438             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 439             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 440             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 441         comb += intregdeps.go_die_i.eq(self.go_die_i)
 442         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 443
 444         return m
 445
 446
 447 class Scoreboard(Elaboratable):
 448     def __init__(self, rwid, n_regs):
 449         """ Inputs:
 450
 451             * :rwid:   bit width of register file(s) - both FP and INT
 452             * :n_regs: depth of register file(s) - number of FP and INT regs
 453         """
 454         self.rwid = rwid
 455         self.n_regs = n_regs
 456
 457         # Register Files
 458         self.intregs = RegFileArray(rwid, n_regs)
 459         self.fpregs = RegFileArray(rwid, n_regs)
 460
 461         # Memory (test for now)
 462         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 463
 464         # issue q needs to get at these
 465         self.aluissue = IssueUnitGroup(2)
 466         self.lsissue = IssueUnitGroup(2)
 467         self.brissue = IssueUnitGroup(1)
 468         # and these
 469         self.alu_op = CompALUOpSubset("alu")
 470         self.br_oper_i = Signal(4, reset_less=True)
 471         self.br_imm_i = Signal(rwid, reset_less=True)
 472         self.ls_oper_i = Signal(4, reset_less=True)
 473         self.ls_imm_i = Signal(rwid, reset_less=True)
 474
 475         # inputs
 476         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 477         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 478         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 479         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 480
 481         # outputs
 482         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 483         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 484
 485         # for branch speculation experiment.  branch_direction = 0 if
 486         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 487         # branch_succ and branch_fail are requests to have the current
 488         # instruction be dependent on the branch unit "shadow" capability.
 489         self.branch_succ_i = Signal(reset_less=True)
 490         self.branch_fail_i = Signal(reset_less=True)
 491         self.branch_direction_o = Signal(2, reset_less=True)
 492
 493     def elaborate(self, platform):
 494         m = Module()
 495         comb = m.d.comb
 496         sync = m.d.sync
 497
 498         m.submodules.intregs = self.intregs
 499         m.submodules.fpregs = self.fpregs
 500         m.submodules.mem = mem = self.mem
 501
 502         # register ports
 503         int_dest = self.intregs.write_port("dest")
 504         int_src1 = self.intregs.read_port("src1")
 505         int_src2 = self.intregs.read_port("src2")
 506
 507         fp_dest = self.fpregs.write_port("dest")
 508         fp_src1 = self.fpregs.read_port("src1")
 509         fp_src2 = self.fpregs.read_port("src2")
 510
 511         # Int ALUs and BR ALUs
 512         n_int_alus = 5
 513         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 514         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 515
 516         # LDST Comp Units
 517         n_ldsts = 2
 518         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 519
 520         # Comp Units
 521         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 522         bgt = cub.bgt  # get at the branch computation unit
 523         br1 = cub.br1
 524
 525         # Int FUs
 526         fu_n_src = 2
 527         fu_n_dst = 1
 528         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 529                                                      fu_n_src, fu_n_dst)
 530
 531         # Memory FUs
 532         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 533
 534         # Memory Priority Picker 1: one gateway per memory port
 535         # picks 1 reader and 1 writer to intreg
 536         mempick1 = GroupPicker(n_ldsts, 1, 1)
 537         m.submodules.mempick1 = mempick1
 538
 539         # Count of number of FUs
 540         n_intfus = n_int_alus
 541         n_fp_fus = 0  # for now
 542
 543         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 544         # picks 1 reader and 1 writer to intreg
 545         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 546         m.submodules.intpick1 = ipick1
 547
 548         # INT/FP Issue Unit
 549         regdecode = RegDecode(self.n_regs)
 550         m.submodules.regdecode = regdecode
 551         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 552         m.submodules.issueunit = issueunit
 553
 554         # Shadow Matrix.  currently n_intfus shadows, to be used for
 555         # write-after-write hazards.  NOTE: there is one extra for branches,
 556         # so the shadow width is increased by 1
 557         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 558         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 559
 560         # record previous instruction to cast shadow on current instruction
 561         prev_shadow = Signal(n_intfus)
 562
 563         # Branch Speculation recorder.  tracks the success/fail state as
 564         # each instruction is issued, so that when the branch occurs the
 565         # allow/cancel can be issued as appropriate.
 566         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 567
 568         # ---------
 569         # ok start wiring things together...
 570         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 571         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 572         # ---------
 573
 574         # ---------
 575         # Issue Unit is where it starts.  set up some in/outs for this module
 576         # ---------
 577         comb += [regdecode.dest_i.eq(self.int_dest_i),
 578                  regdecode.src1_i.eq(self.int_src1_i),
 579                  regdecode.src2_i.eq(self.int_src2_i),
 580                  regdecode.enable_i.eq(self.reg_enable_i),
 581                  self.issue_o.eq(issueunit.issue_o)
 582                  ]
 583
 584         # take these to outside (issue needs them)
 585         comb += cua.op.eq(self.alu_op)
 586         comb += cub.oper_i.eq(self.br_oper_i)
 587         comb += cub.imm_i.eq(self.br_imm_i)
 588         comb += cul.oper_i.eq(self.ls_oper_i)
 589         comb += cul.imm_i.eq(self.ls_imm_i)
 590
 591         # TODO: issueunit.f (FP)
 592
 593         # and int function issue / busy arrays, and dest/src1/src2
 594         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 595         comb += intfus.src_i[0].eq(regdecode.src1_o)
 596         comb += intfus.src_i[1].eq(regdecode.src2_o)
 597
 598         fn_issue_o = issueunit.fn_issue_o
 599
 600         comb += intfus.fn_issue_i.eq(fn_issue_o)
 601         comb += issueunit.busy_i.eq(cu.busy_o)
 602         comb += self.busy_o.eq(cu.busy_o.bool())
 603
 604         # ---------
 605         # Memory Function Unit
 606         # ---------
 607         reset_b = Signal(cul.n_units, reset_less=True)
 608         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 609
 610         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 611         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 612         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 613
 614         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 615         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 616         # issue_i.  multi-issue gets a bit more complex but not a lot.
 617         prior_ldsts = Signal(cul.n_units, reset_less=True)
 618         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 619         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 620             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 621         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 622             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 623
 624         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 625         # just immediately activate go_adr
 626         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 627
 628         # connect up address data
 629         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 630         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 631
 632         # connect loadable / storable to go_ld/go_st.
 633         # XXX should only be done when the memory ld/st has actually happened!
 634         go_st_i = Signal(cul.n_units, reset_less=True)
 635         go_ld_i = Signal(cul.n_units, reset_less=True)
 636         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 637                            cul.adr_rel_o & cul.ld_o)
 638         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 639                            cul.sto_rel_o & cul.st_o)
 640         comb += memfus.go_ld_i.eq(go_ld_i)
 641         comb += memfus.go_st_i.eq(go_st_i)
 642         #comb += cul.go_wr_i.eq(go_ld_i)
 643         comb += cul.go_st_i.eq(go_st_i)
 644
 645         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 646         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 647         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 648
 649         # ---------
 650         # merge shadow matrices outputs
 651         # ---------
 652
 653         # these are explained in ShadowMatrix docstring, and are to be
 654         # connected to the FUReg and FUFU Matrices, to get them to reset
 655         anydie = Signal(n_intfus, reset_less=True)
 656         allshadown = Signal(n_intfus, reset_less=True)
 657         shreset = Signal(n_intfus, reset_less=True)
 658         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 659         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 660         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 661
 662         # ---------
 663         # connect fu-fu matrix
 664         # ---------
 665
 666         # Group Picker... done manually for now.
 667         go_rd_o = ipick1.go_rd_o
 668         go_wr_o = ipick1.go_wr_o
 669         go_rd_i = intfus.go_rd_i
 670         go_wr_i = intfus.go_wr_i
 671         go_die_i = intfus.go_die_i
 672         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 673         for i in range(fu_n_src):
 674             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 675         for i in range(fu_n_dst):
 676             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 677         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 678
 679         # Connect Picker
 680         # ---------
 681         int_rd_o = intfus.readable_o
 682         rrel_o = cu.rd_rel_o
 683         rqrl_o = cu.req_rel_o
 684         for i in range(fu_n_src):
 685             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 686             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 687         int_wr_o = intfus.writable_o
 688         for i in range(fu_n_dst):
 689             # XXX FIXME: rqrl_o[i] here
 690             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 691             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 692
 693         # ---------
 694         # Shadow Matrix
 695         # ---------
 696
 697         comb += shadows.issue_i.eq(fn_issue_o)
 698         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 699         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 700         # ---------
 701         # NOTE; this setup is for the instruction order preservation...
 702
 703         # connect shadows / go_dies to Computation Units
 704         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 705         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 706
 707         # ok connect first n_int_fu shadows to busy lines, to create an
 708         # instruction-order linked-list-like arrangement, using a bit-matrix
 709         # (instead of e.g. a ring buffer).
 710
 711         # when written, the shadow can be cancelled (and was good)
 712         for i in range(n_intfus):
 713             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 714             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 715             # because the comp unit is only free once completed
 716             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 717
 718         # *previous* instruction shadows *current* instruction, and, obviously,
 719         # if the previous is completed (!busy) don't cast the shadow!
 720         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 721         for i in range(n_intfus):
 722             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 723
 724         # ---------
 725         # ... and this is for branch speculation.  it uses the extra bit
 726         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 727         # only needs to set shadow_i, s_fail_i and s_good_i
 728
 729         # issue captures shadow_i (if enabled)
 730         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 731
 732         bactive = Signal(reset_less=True)
 733         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 734
 735         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 736         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 737             comb += bshadow.issue_i.eq(fn_issue_o)
 738             for i in range(n_intfus):
 739                 with m.If(fn_issue_o & (Const(1 << i))):
 740                     comb += bshadow.shadow_i[i][0].eq(1)
 741
 742         # finally, we need an indicator to the test infrastructure as to
 743         # whether the branch succeeded or failed, plus, link up to the
 744         # "recorder" of whether the instruction was under shadow or not
 745
 746         with m.If(br1.issue_i):
 747             sync += bspec.active_i.eq(1)
 748         with m.If(self.branch_succ_i):
 749             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 750         with m.If(self.branch_fail_i):
 751             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 752
 753         # branch is active (TODO: a better signal: this is over-using the
 754         # go_write signal - actually the branch should not be "writing")
 755         with m.If(br1.go_wr_i):
 756             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 757             sync += bspec.active_i.eq(0)
 758             comb += bspec.br_i.eq(1)
 759             # branch occurs if data == 1, failed if data == 0
 760             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 761             for i in range(n_intfus):
 762                 # *expected* direction of the branch matched against *actual*
 763                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 764                 # ... or it didn't
 765                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 766
 767         # ---------
 768         # Connect Register File(s)
 769         # ---------
 770         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 771         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 772         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 773
 774         # connect ALUs to regfile
 775         comb += int_dest.data_i.eq(cu.data_o)
 776         comb += cu.src1_i.eq(int_src1.data_o)
 777         comb += cu.src2_i.eq(int_src2.data_o)
 778
 779         # connect ALU Computation Units
 780         for i in range(fu_n_src):
 781             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 782         for i in range(fu_n_dst):
 783             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 784         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 785
 786         return m
 787
 788     def __iter__(self):
 789         yield from self.intregs
 790         yield from self.fpregs
 791         yield self.int_dest_i
 792         yield self.int_src1_i
 793         yield self.int_src2_i
 794         yield self.issue_o
 795         yield self.branch_succ_i
 796         yield self.branch_fail_i
 797         yield self.branch_direction_o
 798
 799     def ports(self):
 800         return list(self)
 801
 802
 803 class IssueToScoreboard(Elaboratable):
 804
 805     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 806         self.qlen = qlen
 807         self.n_in = n_in
 808         self.n_out = n_out
 809         self.rwid = rwid
 810         self.opw = opwid
 811         self.n_regs = n_regs
 812
 813         mqbits = unsigned(int(log(qlen) / log(2))+2)
 814         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 815         self.p_ready_o = Signal()  # instructions were added
 816         self.data_i = Instruction._nq(n_in, "data_i")
 817
 818         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 819         self.qlen_o = Signal(mqbits, reset_less=True)
 820
 821     def elaborate(self, platform):
 822         m = Module()
 823         comb = m.d.comb
 824         sync = m.d.sync
 825
 826         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 827                           self.n_in, self.n_out)
 828         sc = Scoreboard(self.rwid, self.n_regs)
 829         m.submodules.iq = iq
 830         m.submodules.sc = sc
 831
 832         # get at the regfile for testing
 833         self.intregs = sc.intregs
 834
 835         # and the "busy" signal and instruction queue length
 836         comb += self.busy_o.eq(sc.busy_o)
 837         comb += self.qlen_o.eq(iq.qlen_o)
 838
 839         # link up instruction queue
 840         comb += iq.p_add_i.eq(self.p_add_i)
 841         comb += self.p_ready_o.eq(iq.p_ready_o)
 842         for i in range(self.n_in):
 843             comb += eq(iq.data_i[i], self.data_i[i])
 844
 845         # take instruction and process it.  note that it's possible to
 846         # "inspect" the queue contents *without* actually removing the
 847         # items.  items are only removed when the
 848
 849         # in "waiting" state
 850         wait_issue_br = Signal()
 851         wait_issue_alu = Signal()
 852         wait_issue_ls = Signal()
 853
 854         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 855             # set instruction pop length to 1 if the unit accepted
 856             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 857                 with m.If(iq.qlen_o != 0):
 858                     comb += iq.n_sub_i.eq(1)
 859             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 860                 with m.If(iq.qlen_o != 0):
 861                     comb += iq.n_sub_i.eq(1)
 862             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 863                 with m.If(iq.qlen_o != 0):
 864                     comb += iq.n_sub_i.eq(1)
 865
 866         # see if some instruction(s) are here.  note that this is
 867         # "inspecting" the in-place queue.  note also that on the
 868         # cycle following "waiting" for fn_issue_o to be set, the
 869         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 870         with m.If(iq.qlen_o != 0):
 871             # get the operands and operation
 872             instr = iq.data_o[0]
 873             imm = instr.imm_data.data
 874             dest = instr.write_reg.data
 875             src1 = instr.read_reg1.data
 876             src2 = instr.read_reg2.data
 877             op = instr.insn_type
 878             fu = instr.fn_unit
 879             opi = instr.imm_data.ok  # immediate set
 880
 881             # set the src/dest regs
 882             comb += sc.int_dest_i.eq(dest)
 883             comb += sc.int_src1_i.eq(src1)
 884             comb += sc.int_src2_i.eq(src2)
 885             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 886
 887             # choose a Function-Unit-Group
 888             with m.If(fu == Function.ALU):  # alu
 889                 comb += sc.alu_op.eq_from_execute1(instr)
 890                 comb += sc.aluissue.insn_i.eq(1)
 891                 comb += wait_issue_alu.eq(1)
 892             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 893                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 894                 comb += sc.br_imm_i.eq(imm)
 895                 comb += sc.brissue.insn_i.eq(1)
 896                 comb += wait_issue_br.eq(1)
 897             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 898                 # see compldst.py
 899                 # bit 0: ADD/SUB
 900                 # bit 1: immed
 901                 # bit 4: LD
 902                 # bit 5: ST
 903                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 904                 comb += sc.ls_imm_i.eq(imm)
 905                 comb += sc.lsissue.insn_i.eq(1)
 906                 comb += wait_issue_ls.eq(1)
 907
 908             # XXX TODO
 909             # these indicate that the instruction is to be made
 910             # shadow-dependent on
 911             # (either) branch success or branch fail
 912             # yield sc.branch_fail_i.eq(branch_fail)
 913             # yield sc.branch_succ_i.eq(branch_success)
 914
 915         return m
 916
 917     def __iter__(self):
 918         yield self.p_ready_o
 919         for o in self.data_i:
 920             yield from list(o)
 921         yield self.p_add_i
 922
 923     def ports(self):
 924         return list(self)
 925
 926
 927 def power_instr_q(dut, pdecode2, ins, code):
 928     instrs = [pdecode2.e]
 929
 930     sendlen = 1
 931     for idx, instr in enumerate(instrs):
 932         yield dut.data_i[idx].eq(instr)
 933         insn_type = yield instr.insn_type
 934         fn_unit = yield instr.fn_unit
 935         print("senddata ", idx, insn_type, fn_unit, instr)
 936     yield dut.p_add_i.eq(sendlen)
 937     yield
 938     o_p_ready = yield dut.p_ready_o
 939     while not o_p_ready:
 940         yield
 941         o_p_ready = yield dut.p_ready_o
 942
 943     yield dut.p_add_i.eq(0)
 944
 945
 946 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 947             branch_success, branch_fail):
 948     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 949                 'imm_data': (imm, op_imm),
 950                'read_reg1': src1, 'read_reg2': src2}]
 951
 952     sendlen = 1
 953     for idx, instr in enumerate(instrs):
 954         imm, op_imm = instr['imm_data']
 955         reg1 = instr['read_reg1']
 956         reg2 = instr['read_reg2']
 957         dest = instr['write_reg']
 958         insn_type = instr['insn_type']
 959         fn_unit = instr['fn_unit']
 960         yield dut.data_i[idx].insn_type.eq(insn_type)
 961         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 962         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 963         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 964         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 965         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 966         yield dut.data_i[idx].write_reg.data.eq(dest)
 967         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 968         yield dut.data_i[idx].imm_data.data.eq(imm)
 969         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 970         di = yield dut.data_i[idx]
 971         print("senddata %d %x" % (idx, di))
 972     yield dut.p_add_i.eq(sendlen)
 973     yield
 974     o_p_ready = yield dut.p_ready_o
 975     while not o_p_ready:
 976         yield
 977         o_p_ready = yield dut.p_ready_o
 978
 979     yield dut.p_add_i.eq(0)
 980
 981
 982 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 983     yield from disable_issue(dut)
 984     yield dut.int_dest_i.eq(dest)
 985     yield dut.int_src1_i.eq(src1)
 986     yield dut.int_src2_i.eq(src2)
 987     if (op & (0x3 << 2)) != 0:  # branch
 988         yield dut.brissue.insn_i.eq(1)
 989         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 990         yield dut.br_imm_i.eq(imm)
 991         dut_issue = dut.brissue
 992     else:
 993         yield dut.aluissue.insn_i.eq(1)
 994         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 995         yield dut.alu_imm_i.eq(imm)
 996         dut_issue = dut.aluissue
 997     yield dut.reg_enable_i.eq(1)
 998
 999     # these indicate that the instruction is to be made shadow-dependent on
1000     # (either) branch success or branch fail
1001     yield dut.branch_fail_i.eq(branch_fail)
1002     yield dut.branch_succ_i.eq(branch_success)
1003
1004     yield
1005     yield from wait_for_issue(dut, dut_issue)
1006
1007
1008 def print_reg(dut, rnums):
1009     rs = []
1010     for rnum in rnums:
1011         reg = yield dut.intregs.regs[rnum].reg
1012         rs.append("%x" % reg)
1013     rnums = map(str, rnums)
1014     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1015
1016
1017 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1018     insts = []
1019     for i in range(n_ops):
1020         src1 = randint(1, dut.n_regs-1)
1021         src2 = randint(1, dut.n_regs-1)
1022         imm = randint(1, (1 << dut.rwid)-1)
1023         dest = randint(1, dut.n_regs-1)
1024         op = randint(0, max_opnums)
1025         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
1026
1027         if shadowing:
1028             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1029         else:
1030             insts.append((src1, src2, dest, op, opi, imm))
1031     return insts
1032
1033
1034 def wait_for_busy_clear(dut):
1035     while True:
1036         busy_o = yield dut.busy_o
1037         if not busy_o:
1038             break
1039         print("busy",)
1040         yield
1041
1042
1043 def disable_issue(dut):
1044     yield dut.aluissue.insn_i.eq(0)
1045     yield dut.brissue.insn_i.eq(0)
1046     yield dut.lsissue.insn_i.eq(0)
1047
1048
1049 def wait_for_issue(dut, dut_issue):
1050     while True:
1051         issue_o = yield dut_issue.fn_issue_o
1052         if issue_o:
1053             yield from disable_issue(dut)
1054             yield dut.reg_enable_i.eq(0)
1055             break
1056         print("busy",)
1057         # yield from print_reg(dut, [1,2,3])
1058         yield
1059     # yield from print_reg(dut, [1,2,3])
1060
1061
1062 def scoreboard_branch_sim(dut, alusim):
1063
1064     iseed = 3
1065
1066     for i in range(1):
1067
1068         print("rseed", iseed)
1069         seed(iseed)
1070         iseed += 1
1071
1072         yield dut.branch_direction_o.eq(0)
1073
1074         # set random values in the registers
1075         for i in range(1, dut.n_regs):
1076             val = 31+i*3
1077             val = randint(0, (1 << alusim.rwidth)-1)
1078             yield dut.intregs.regs[i].reg.eq(val)
1079             alusim.setval(i, val)
1080
1081         if False:
1082             # create some instructions: branches create a tree
1083             insts = create_random_ops(dut, 1, True, 1)
1084             #insts.append((6, 6, 1, 2, (0, 0)))
1085             #insts.append((4, 3, 3, 0, (0, 0)))
1086
1087             src1 = randint(1, dut.n_regs-1)
1088             src2 = randint(1, dut.n_regs-1)
1089             #op = randint(4, 7)
1090             op = 4  # only BGT at the moment
1091
1092             branch_ok = create_random_ops(dut, 1, True, 1)
1093             branch_fail = create_random_ops(dut, 1, True, 1)
1094
1095             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1096
1097         if True:
1098             insts = []
1099             insts.append((3, 5, 2, 0, (0, 0)))
1100             branch_ok = []
1101             branch_fail = []
1102             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1103             branch_ok.append(None)
1104             branch_fail.append((1, 1, 2, 0, (0, 1)))
1105             #branch_fail.append( None )
1106             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1107
1108         siminsts = deepcopy(insts)
1109
1110         # issue instruction(s)
1111         i = -1
1112         instrs = insts
1113         branch_direction = 0
1114         while instrs:
1115             yield
1116             yield
1117             i += 1
1118             branch_direction = yield dut.branch_direction_o  # way branch went
1119             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1120             if branch_direction == 1 and shadow_on:
1121                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1122                 continue  # branch was "success" and this is a "failed"... skip
1123             if branch_direction == 2 and shadow_off:
1124                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1125                 continue  # branch was "fail" and this is a "success"... skip
1126             if branch_direction != 0:
1127                 shadow_on = 0
1128                 shadow_off = 0
1129             is_branch = op >= 4
1130             if is_branch:
1131                 branch_ok, branch_fail = dest
1132                 dest = src2
1133                 # ok zip up the branch success / fail instructions and
1134                 # drop them into the queue, one marked "to have branch success"
1135                 # the other to be marked shadow branch "fail".
1136                 # one out of each of these will be cancelled
1137                 for ok, fl in zip(branch_ok, branch_fail):
1138                     if ok:
1139                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1140                     if fl:
1141                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1142             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1143                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1144             yield from int_instr(dut, op, src1, src2, dest,
1145                                  shadow_on, shadow_off)
1146
1147         # wait for all instructions to stop before checking
1148         yield
1149         yield from wait_for_busy_clear(dut)
1150
1151         i = -1
1152         while siminsts:
1153             instr = siminsts.pop(0)
1154             if instr is None:
1155                 continue
1156             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1157             i += 1
1158             is_branch = op >= 4
1159             if is_branch:
1160                 branch_ok, branch_fail = dest
1161                 dest = src2
1162             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1163                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1164             branch_res = alusim.op(op, src1, src2, dest)
1165             if is_branch:
1166                 if branch_res:
1167                     siminsts += branch_ok
1168                 else:
1169                     siminsts += branch_fail
1170
1171         # check status
1172         yield from alusim.check(dut)
1173         yield from alusim.dump(dut)
1174
1175
1176 def power_sim(m, dut, pdecode2, instruction, alusim):
1177
1178     seed(0)
1179
1180     for i in range(1):
1181
1182         # set random values in the registers
1183         for i in range(1, dut.n_regs):
1184             #val = randint(0, (1<<alusim.rwidth)-1)
1185             #val = 31+i*3
1186             val = i # XXX actually, not random at all
1187             yield dut.intregs.regs[i].reg.eq(val)
1188             alusim.setval(i, val)
1189
1190         # create some instructions
1191         lst = [#"addi 2, 0, 0x4321",
1192                #"addi 3, 0, 0x1234",
1193                "add  1, 3, 2",
1194                "add  4, 3, 5"
1195                 ]
1196         with Program(lst) as program:
1197             gen = program.generate_instructions()
1198
1199             # issue instruction(s), wait for issue to be free before proceeding
1200             for ins, code in zip(gen, program.assembly.splitlines()):
1201                 yield instruction.eq(ins)          # raw binary instr.
1202                 yield #Delay(1e-6)
1203
1204                 print("binary 0x{:X}".format(ins & 0xffffffff))
1205                 print("assembly", code)
1206
1207                 #alusim.op(op, opi, imm, src1, src2, dest)
1208                 yield from power_instr_q(dut, pdecode2, ins, code)
1209
1210         # wait for all instructions to stop before checking
1211         while True:
1212             iqlen = yield dut.qlen_o
1213             if iqlen == 0:
1214                 break
1215             yield
1216         yield
1217         yield
1218         yield
1219         yield
1220         yield from wait_for_busy_clear(dut)
1221
1222         # check status
1223         yield from alusim.check(dut)
1224         yield from alusim.dump(dut)
1225
1226
1227 def scoreboard_sim(dut, alusim):
1228
1229     seed(0)
1230
1231     for i in range(1):
1232
1233         # set random values in the registers
1234         for i in range(1, dut.n_regs):
1235             #val = randint(0, (1<<alusim.rwidth)-1)
1236             #val = 31+i*3
1237             val = i
1238             yield dut.intregs.regs[i].reg.eq(val)
1239             alusim.setval(i, val)
1240
1241         # create some instructions (some random, some regression tests)
1242         instrs = []
1243         if False:
1244             instrs = create_random_ops(dut, 15, True, 4)
1245
1246         if False:  # LD/ST test (with immediate)
1247             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1248             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1249
1250         if False:
1251             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1252
1253         if False:
1254             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1255             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1256             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1257
1258         if True:
1259             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1260                            0, 0, (0, 0)))
1261             instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1262                            0, 0, (0, 0)))
1263         if False:
1264             instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1265                            1, 7, (0, 0)))
1266         if False:
1267             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1268                            0, 0, (0, 0)))
1269
1270         if False:
1271             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1272             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1273             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1274             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1275             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1276
1277         if False:
1278             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1279             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1280             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1281
1282         if False:
1283             instrs.append((5, 6, 2, 1))
1284             instrs.append((2, 2, 4, 0))
1285             #instrs.append((2, 2, 3, 1))
1286
1287         if False:
1288             instrs.append((2, 1, 2, 3))
1289
1290         if False:
1291             instrs.append((2, 6, 2, 1))
1292             instrs.append((2, 1, 2, 0))
1293
1294         if False:
1295             instrs.append((1, 2, 7, 2))
1296             instrs.append((7, 1, 5, 0))
1297             instrs.append((4, 4, 1, 1))
1298
1299         if False:
1300             instrs.append((5, 6, 2, 2))
1301             instrs.append((1, 1, 4, 1))
1302             instrs.append((6, 5, 3, 0))
1303
1304         if False:
1305             # Write-after-Write Hazard
1306             instrs.append((3, 6, 7, 2))
1307             instrs.append((4, 4, 7, 1))
1308
1309         if False:
1310             # self-read/write-after-write followed by Read-after-Write
1311             instrs.append((1, 1, 1, 1))
1312             instrs.append((1, 5, 3, 0))
1313
1314         if False:
1315             # Read-after-Write followed by self-read-after-write
1316             instrs.append((5, 6, 1, 2))
1317             instrs.append((1, 1, 1, 1))
1318
1319         if False:
1320             # self-read-write sandwich
1321             instrs.append((5, 6, 1, 2))
1322             instrs.append((1, 1, 1, 1))
1323             instrs.append((1, 5, 3, 0))
1324
1325         if False:
1326             # very weird failure
1327             instrs.append((5, 2, 5, 2))
1328             instrs.append((2, 6, 3, 0))
1329             instrs.append((4, 2, 2, 1))
1330
1331         if False:
1332             v1 = 4
1333             yield dut.intregs.regs[5].reg.eq(v1)
1334             alusim.setval(5, v1)
1335             yield dut.intregs.regs[3].reg.eq(5)
1336             alusim.setval(3, 5)
1337             instrs.append((5, 3, 3, 4, (0, 0)))
1338             instrs.append((4, 2, 1, 2, (0, 1)))
1339
1340         if False:
1341             v1 = 6
1342             yield dut.intregs.regs[5].reg.eq(v1)
1343             alusim.setval(5, v1)
1344             yield dut.intregs.regs[3].reg.eq(5)
1345             alusim.setval(3, 5)
1346             instrs.append((5, 3, 3, 4, (0, 0)))
1347             instrs.append((4, 2, 1, 2, (1, 0)))
1348
1349         if False:
1350             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1351             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1352             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1353             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1354             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1355             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1356             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1357             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1358             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1359
1360         # issue instruction(s), wait for issue to be free before proceeding
1361         for i, instr in enumerate(instrs):
1362             print (i, instr)
1363             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1364
1365             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1366                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1367             alusim.op(op, opi, imm, src1, src2, dest)
1368             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1369                                br_ok, br_fail)
1370
1371         # wait for all instructions to stop before checking
1372         while True:
1373             iqlen = yield dut.qlen_o
1374             if iqlen == 0:
1375                 break
1376             yield
1377         yield
1378         yield
1379         yield
1380         yield
1381         yield from wait_for_busy_clear(dut)
1382
1383         # check status
1384         yield from alusim.check(dut)
1385         yield from alusim.dump(dut)
1386
1387
1388 def test_scoreboard():
1389     regwidth = 64
1390     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1391     alusim = RegSim(regwidth, 8)
1392     memsim = MemSim(16, 8)
1393
1394     m = Module()
1395     comb = m.d.comb
1396     instruction = Signal(32)
1397
1398     # set up the decoder (and simulator, later)
1399     pdecode = create_pdecode()
1400     #simulator = ISA(pdecode, initial_regs)
1401
1402     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1403     m.submodules.sim = dut
1404
1405     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1406     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1407
1408     vl = rtlil.convert(m, ports=dut.ports())
1409     with open("test_scoreboard6600.il", "w") as f:
1410         f.write(vl)
1411
1412     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1413                    vcd_name='test_powerboard6600.vcd')
1414
1415     #run_simulation(dut, scoreboard_sim(dut, alusim),
1416     #               vcd_name='test_scoreboard6600.vcd')
1417
1418     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1419     #                    vcd_name='test_scoreboard6600.vcd')
1420
1421
1422 if __name__ == '__main__':
1423     test_scoreboard()