src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from soc.regfile.regfile import RegFileArray, treereduce
   7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from soc.scoreboard.global_pending import GlobalPending
  10 from soc.scoreboard.group_picker import GroupPicker
  11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  14 from soc.scoreboard.memfu import MemFunctionUnits
  15
  16 from .compalu import ComputationUnitNoDelay
  17 from .compldst import LDSTCompUnit
  18 from .testmem import TestMemory
  19
  20 from .alu_hier import ALU, BranchALU
  21 from nmutil.latch import SRLatch
  22 from nmutil.nmoperator import eq
  23
  24 from random import randint, seed
  25 from copy import deepcopy
  26 from math import log
  27
  28 from soc.experiment.sim import RegSim, MemSim
  29 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  30
  31
  32 class CompUnitsBase(Elaboratable):
  33     """ Computation Unit Base class.
  34
  35         Amazingly, this class works recursively.  It's supposed to just
  36         look after some ALUs (that can handle the same operations),
  37         grouping them together, however it turns out that the same code
  38         can also group *groups* of Computation Units together as well.
  39
  40         Basically it was intended just to concatenate the ALU's issue,
  41         go_rd etc. signals together, which start out as bits and become
  42         sequences.  Turns out that the same trick works just as well
  43         on Computation Units!
  44
  45         So this class may be used recursively to present a top-level
  46         sequential concatenation of all the signals in and out of
  47         ALUs, whilst at the same time making it convenient to group
  48         ALUs together.
  49
  50         At the lower level, the intent is that groups of (identical)
  51         ALUs may be passed the same operation.  Even beyond that,
  52         the intent is that that group of (identical) ALUs actually
  53         share the *same pipeline* and as such become a "Concurrent
  54         Computation Unit" as defined by Mitch Alsup (see section
  55         11.4.9.3)
  56     """
  57
  58     def __init__(self, rwid, units, ldstmode=False):
  59         """ Inputs:
  60
  61             * :rwid:   bit width of register file(s) - both FP and INT
  62             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  63         """
  64         self.units = units
  65         self.ldstmode = ldstmode
  66         self.rwid = rwid
  67         self.rwid = rwid
  68         if units and isinstance(units[0], CompUnitsBase):
  69             self.n_units = 0
  70             for u in self.units:
  71                 self.n_units += u.n_units
  72         else:
  73             self.n_units = len(units)
  74
  75         n_units = self.n_units
  76
  77         # inputs
  78         self.issue_i = Signal(n_units, reset_less=True)
  79         self.go_rd_i = Signal(n_units, reset_less=True)
  80         self.go_wr_i = Signal(n_units, reset_less=True)
  81         self.shadown_i = Signal(n_units, reset_less=True)
  82         self.go_die_i = Signal(n_units, reset_less=True)
  83         if ldstmode:
  84             self.go_ad_i = Signal(n_units, reset_less=True)
  85             self.go_st_i = Signal(n_units, reset_less=True)
  86
  87         # outputs
  88         self.busy_o = Signal(n_units, reset_less=True)
  89         self.rd_rel_o = Signal(n_units, reset_less=True)
  90         self.req_rel_o = Signal(n_units, reset_less=True)
  91         self.done_o = Signal(n_units, reset_less=True)
  92         if ldstmode:
  93             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
  94             self.st_o = Signal(n_units, reset_less=True)  # op is ST
  95             self.adr_rel_o = Signal(n_units, reset_less=True)
  96             self.sto_rel_o = Signal(n_units, reset_less=True)
  97             self.load_mem_o = Signal(n_units, reset_less=True)
  98             self.stwd_mem_o = Signal(n_units, reset_less=True)
  99             self.addr_o = Signal(rwid, reset_less=True)
 100
 101         # in/out register data (note: not register#, actual data)
 102         self.data_o = Signal(rwid, reset_less=True)
 103         self.src1_i = Signal(rwid, reset_less=True)
 104         self.src2_i = Signal(rwid, reset_less=True)
 105         # input operand
 106
 107     def elaborate(self, platform):
 108         m = Module()
 109         comb = m.d.comb
 110
 111         for i, alu in enumerate(self.units):
 112             setattr(m.submodules, "comp%d" % i, alu)
 113
 114         go_rd_l = []
 115         go_wr_l = []
 116         issue_l = []
 117         busy_l = []
 118         req_rel_l = []
 119         done_l = []
 120         rd_rel_l = []
 121         shadow_l = []
 122         godie_l = []
 123         for alu in self.units:
 124             req_rel_l.append(alu.req_rel_o)
 125             done_l.append(alu.done_o)
 126             rd_rel_l.append(alu.rd_rel_o)
 127             shadow_l.append(alu.shadown_i)
 128             godie_l.append(alu.go_die_i)
 129             go_wr_l.append(alu.go_wr_i)
 130             go_rd_l.append(alu.go_rd_i)
 131             issue_l.append(alu.issue_i)
 132             busy_l.append(alu.busy_o)
 133         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 134         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 135         comb += self.done_o.eq(Cat(*done_l))
 136         comb += self.busy_o.eq(Cat(*busy_l))
 137         comb += Cat(*godie_l).eq(self.go_die_i)
 138         comb += Cat(*shadow_l).eq(self.shadown_i)
 139         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 140         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 141         comb += Cat(*issue_l).eq(self.issue_i)
 142
 143         # connect data register input/output
 144
 145         # merge (OR) all integer FU / ALU outputs to a single value
 146         if self.units:
 147             data_o = treereduce(self.units, "data_o")
 148             comb += self.data_o.eq(data_o)
 149             if self.ldstmode:
 150                 addr_o = treereduce(self.units, "addr_o")
 151                 comb += self.addr_o.eq(addr_o)
 152
 153         for i, alu in enumerate(self.units):
 154             comb += alu.src1_i.eq(self.src1_i)
 155             comb += alu.src2_i.eq(self.src2_i)
 156
 157         if not self.ldstmode:
 158             return m
 159
 160         ldmem_l = []
 161         stmem_l = []
 162         go_ad_l = []
 163         go_st_l = []
 164         ld_l = []
 165         st_l = []
 166         adr_rel_l = []
 167         sto_rel_l = []
 168         for alu in self.units:
 169             ld_l.append(alu.ld_o)
 170             st_l.append(alu.st_o)
 171             adr_rel_l.append(alu.adr_rel_o)
 172             sto_rel_l.append(alu.sto_rel_o)
 173             ldmem_l.append(alu.load_mem_o)
 174             stmem_l.append(alu.stwd_mem_o)
 175             go_ad_l.append(alu.go_ad_i)
 176             go_st_l.append(alu.go_st_i)
 177         comb += self.ld_o.eq(Cat(*ld_l))
 178         comb += self.st_o.eq(Cat(*st_l))
 179         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 180         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 181         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 182         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 183         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 184         comb += Cat(*go_st_l).eq(self.go_st_i)
 185
 186         return m
 187
 188
 189 class CompUnitLDSTs(CompUnitsBase):
 190
 191     def __init__(self, rwid, opwid, n_ldsts, mem):
 192         """ Inputs:
 193
 194             * :rwid:   bit width of register file(s) - both FP and INT
 195             * :opwid:  operand bit width
 196         """
 197         self.opwid = opwid
 198
 199         # inputs
 200         self.oper_i = Signal(opwid, reset_less=True)
 201         self.imm_i = Signal(rwid, reset_less=True)
 202
 203         # Int ALUs
 204         self.alus = []
 205         for i in range(n_ldsts):
 206             self.alus.append(ALU(rwid))
 207
 208         units = []
 209         for alu in self.alus:
 210             aluopwid = 4  # see compldst.py for "internal" opcode
 211             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 212
 213         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 214
 215     def elaborate(self, platform):
 216         m = CompUnitsBase.elaborate(self, platform)
 217         comb = m.d.comb
 218
 219         # hand the same operation to all units, 4 lower bits though
 220         for alu in self.units:
 221             comb += alu.oper_i[0:4].eq(self.oper_i)
 222             comb += alu.imm_i.eq(self.imm_i)
 223             comb += alu.isalu_i.eq(0)
 224
 225         return m
 226
 227
 228 class CompUnitALUs(CompUnitsBase):
 229
 230     def __init__(self, rwid, opwid, n_alus):
 231         """ Inputs:
 232
 233             * :rwid:   bit width of register file(s) - both FP and INT
 234             * :opwid:  operand bit width
 235         """
 236         self.opwid = opwid
 237
 238         # inputs
 239         self.oper_i = Signal(opwid, reset_less=True)
 240         self.imm_i = Signal(rwid, reset_less=True)
 241
 242         # Int ALUs
 243         alus = []
 244         for i in range(n_alus):
 245             alus.append(ALU(rwid))
 246
 247         units = []
 248         for alu in alus:
 249             aluopwid = 3  # extra bit for immediate mode
 250             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 251
 252         CompUnitsBase.__init__(self, rwid, units)
 253
 254     def elaborate(self, platform):
 255         m = CompUnitsBase.elaborate(self, platform)
 256         comb = m.d.comb
 257
 258         # hand the same operation to all units, only lower 3 bits though
 259         for alu in self.units:
 260             comb += alu.oper_i[0:3].eq(self.oper_i)
 261             comb += alu.imm_i.eq(self.imm_i)
 262
 263         return m
 264
 265
 266 class CompUnitBR(CompUnitsBase):
 267
 268     def __init__(self, rwid, opwid):
 269         """ Inputs:
 270
 271             * :rwid:   bit width of register file(s) - both FP and INT
 272             * :opwid:  operand bit width
 273
 274             Note: bgt unit is returned so that a shadow unit can be created
 275             for it
 276         """
 277         self.opwid = opwid
 278
 279         # inputs
 280         self.oper_i = Signal(opwid, reset_less=True)
 281         self.imm_i = Signal(rwid, reset_less=True)
 282
 283         # Branch ALU and CU
 284         self.bgt = BranchALU(rwid)
 285         aluopwid = 3  # extra bit for immediate mode
 286         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 287         CompUnitsBase.__init__(self, rwid, [self.br1])
 288
 289     def elaborate(self, platform):
 290         m = CompUnitsBase.elaborate(self, platform)
 291         comb = m.d.comb
 292
 293         # hand the same operation to all units
 294         for alu in self.units:
 295             comb += alu.oper_i.eq(self.oper_i)
 296             comb += alu.imm_i.eq(self.imm_i)
 297
 298         return m
 299
 300
 301 class FunctionUnits(Elaboratable):
 302
 303     def __init__(self, n_regs, n_int_alus):
 304         self.n_regs = n_regs
 305         self.n_int_alus = n_int_alus
 306
 307         self.dest_i = Signal(n_regs, reset_less=True)  # Dest R# in
 308         self.src1_i = Signal(n_regs, reset_less=True)  # oper1 R# in
 309         self.src2_i = Signal(n_regs, reset_less=True)  # oper2 R# in
 310
 311         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 312         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 313
 314         self.dest_rsel_o = Signal(n_regs, reset_less=True)  # dest reg (bot)
 315         self.src1_rsel_o = Signal(n_regs, reset_less=True)  # src1 reg (bot)
 316         self.src2_rsel_o = Signal(n_regs, reset_less=True)  # src2 reg (bot)
 317
 318         self.readable_o = Signal(n_int_alus, reset_less=True)
 319         self.writable_o = Signal(n_int_alus, reset_less=True)
 320
 321         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 322         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 323         self.go_die_i = Signal(n_int_alus, reset_less=True)
 324         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 325
 326         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 327
 328     def elaborate(self, platform):
 329         m = Module()
 330         comb = m.d.comb
 331         sync = m.d.sync
 332
 333         n_intfus = self.n_int_alus
 334
 335         # Integer FU-FU Dep Matrix
 336         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 337         m.submodules.intfudeps = intfudeps
 338         # Integer FU-Reg Dep Matrix
 339         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 340         m.submodules.intregdeps = intregdeps
 341
 342         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 343         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 344
 345         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 346         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 347
 348         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 349         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 350         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 351
 352         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 353         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 354         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 355         comb += intfudeps.go_die_i.eq(self.go_die_i)
 356         comb += self.readable_o.eq(intfudeps.readable_o)
 357         comb += self.writable_o.eq(intfudeps.writable_o)
 358
 359         # Connect function issue / arrays, and dest/src1/src2
 360         comb += intregdeps.dest_i.eq(self.dest_i)
 361         comb += intregdeps.src_i[0].eq(self.src1_i)
 362         comb += intregdeps.src_i[1].eq(self.src2_i)
 363
 364         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 365         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 366         comb += intregdeps.go_die_i.eq(self.go_die_i)
 367         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 368
 369         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 370         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 371         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 372
 373         return m
 374
 375
 376 class Scoreboard(Elaboratable):
 377     def __init__(self, rwid, n_regs):
 378         """ Inputs:
 379
 380             * :rwid:   bit width of register file(s) - both FP and INT
 381             * :n_regs: depth of register file(s) - number of FP and INT regs
 382         """
 383         self.rwid = rwid
 384         self.n_regs = n_regs
 385
 386         # Register Files
 387         self.intregs = RegFileArray(rwid, n_regs)
 388         self.fpregs = RegFileArray(rwid, n_regs)
 389
 390         # Memory (test for now)
 391         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 392
 393         # issue q needs to get at these
 394         self.aluissue = IssueUnitGroup(2)
 395         self.lsissue = IssueUnitGroup(2)
 396         self.brissue = IssueUnitGroup(1)
 397         # and these
 398         self.alu_oper_i = Signal(4, reset_less=True)
 399         self.alu_imm_i = Signal(rwid, reset_less=True)
 400         self.br_oper_i = Signal(4, reset_less=True)
 401         self.br_imm_i = Signal(rwid, reset_less=True)
 402         self.ls_oper_i = Signal(4, reset_less=True)
 403         self.ls_imm_i = Signal(rwid, reset_less=True)
 404
 405         # inputs
 406         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 407         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 408         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 409         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 410
 411         # outputs
 412         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 413         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 414
 415         # for branch speculation experiment.  branch_direction = 0 if
 416         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 417         # branch_succ and branch_fail are requests to have the current
 418         # instruction be dependent on the branch unit "shadow" capability.
 419         self.branch_succ_i = Signal(reset_less=True)
 420         self.branch_fail_i = Signal(reset_less=True)
 421         self.branch_direction_o = Signal(2, reset_less=True)
 422
 423     def elaborate(self, platform):
 424         m = Module()
 425         comb = m.d.comb
 426         sync = m.d.sync
 427
 428         m.submodules.intregs = self.intregs
 429         m.submodules.fpregs = self.fpregs
 430         m.submodules.mem = mem = self.mem
 431
 432         # register ports
 433         int_dest = self.intregs.write_port("dest")
 434         int_src1 = self.intregs.read_port("src1")
 435         int_src2 = self.intregs.read_port("src2")
 436
 437         fp_dest = self.fpregs.write_port("dest")
 438         fp_src1 = self.fpregs.read_port("src1")
 439         fp_src2 = self.fpregs.read_port("src2")
 440
 441         # Int ALUs and BR ALUs
 442         n_int_alus = 5
 443         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 444         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 445
 446         # LDST Comp Units
 447         n_ldsts = 2
 448         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 449
 450         # Comp Units
 451         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 452         bgt = cub.bgt  # get at the branch computation unit
 453         br1 = cub.br1
 454
 455         # Int FUs
 456         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 457
 458         # Memory FUs
 459         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 460
 461         # Memory Priority Picker 1: one gateway per memory port
 462         # picks 1 reader and 1 writer to intreg
 463         mempick1 = GroupPicker(n_ldsts)
 464         m.submodules.mempick1 = mempick1
 465
 466         # Count of number of FUs
 467         n_intfus = n_int_alus
 468         n_fp_fus = 0  # for now
 469
 470         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 471         # picks 1 reader and 1 writer to intreg
 472         intpick1 = GroupPicker(n_intfus)
 473         m.submodules.intpick1 = intpick1
 474
 475         # INT/FP Issue Unit
 476         regdecode = RegDecode(self.n_regs)
 477         m.submodules.regdecode = regdecode
 478         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 479         m.submodules.issueunit = issueunit
 480
 481         # Shadow Matrix.  currently n_intfus shadows, to be used for
 482         # write-after-write hazards.  NOTE: there is one extra for branches,
 483         # so the shadow width is increased by 1
 484         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 485         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 486
 487         # record previous instruction to cast shadow on current instruction
 488         prev_shadow = Signal(n_intfus)
 489
 490         # Branch Speculation recorder.  tracks the success/fail state as
 491         # each instruction is issued, so that when the branch occurs the
 492         # allow/cancel can be issued as appropriate.
 493         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 494
 495         # ---------
 496         # ok start wiring things together...
 497         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 498         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 499         # ---------
 500
 501         # ---------
 502         # Issue Unit is where it starts.  set up some in/outs for this module
 503         # ---------
 504         comb += [regdecode.dest_i.eq(self.int_dest_i),
 505                  regdecode.src1_i.eq(self.int_src1_i),
 506                  regdecode.src2_i.eq(self.int_src2_i),
 507                  regdecode.enable_i.eq(self.reg_enable_i),
 508                  self.issue_o.eq(issueunit.issue_o)
 509                  ]
 510
 511         # take these to outside (issue needs them)
 512         comb += cua.oper_i.eq(self.alu_oper_i)
 513         comb += cua.imm_i.eq(self.alu_imm_i)
 514         comb += cub.oper_i.eq(self.br_oper_i)
 515         comb += cub.imm_i.eq(self.br_imm_i)
 516         comb += cul.oper_i.eq(self.ls_oper_i)
 517         comb += cul.imm_i.eq(self.ls_imm_i)
 518
 519         # TODO: issueunit.f (FP)
 520
 521         # and int function issue / busy arrays, and dest/src1/src2
 522         comb += intfus.dest_i.eq(regdecode.dest_o)
 523         comb += intfus.src1_i.eq(regdecode.src1_o)
 524         comb += intfus.src2_i.eq(regdecode.src2_o)
 525
 526         fn_issue_o = issueunit.fn_issue_o
 527
 528         comb += intfus.fn_issue_i.eq(fn_issue_o)
 529         comb += issueunit.busy_i.eq(cu.busy_o)
 530         comb += self.busy_o.eq(cu.busy_o.bool())
 531
 532         # ---------
 533         # Memory Function Unit
 534         # ---------
 535         reset_b = Signal(cul.n_units, reset_less=True)
 536         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 537
 538         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 539         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 540         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 541
 542         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 543         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 544         # issue_i.  multi-issue gets a bit more complex but not a lot.
 545         prior_ldsts = Signal(cul.n_units, reset_less=True)
 546         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 547         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 548             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 549         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 550             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 551
 552         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 553         # just immediately activate go_adr
 554         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 555
 556         # connect up address data
 557         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 558         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 559
 560         # connect loadable / storable to go_ld/go_st.
 561         # XXX should only be done when the memory ld/st has actually happened!
 562         go_st_i = Signal(cul.n_units, reset_less=True)
 563         go_ld_i = Signal(cul.n_units, reset_less=True)
 564         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 565                            cul.adr_rel_o & cul.ld_o)
 566         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 567                            cul.sto_rel_o & cul.st_o)
 568         comb += memfus.go_ld_i.eq(go_ld_i)
 569         comb += memfus.go_st_i.eq(go_st_i)
 570         #comb += cul.go_wr_i.eq(go_ld_i)
 571         comb += cul.go_st_i.eq(go_st_i)
 572
 573         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 574         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 575         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 576
 577         # ---------
 578         # merge shadow matrices outputs
 579         # ---------
 580
 581         # these are explained in ShadowMatrix docstring, and are to be
 582         # connected to the FUReg and FUFU Matrices, to get them to reset
 583         anydie = Signal(n_intfus, reset_less=True)
 584         allshadown = Signal(n_intfus, reset_less=True)
 585         shreset = Signal(n_intfus, reset_less=True)
 586         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 587         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 588         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 589
 590         # ---------
 591         # connect fu-fu matrix
 592         # ---------
 593
 594         # Group Picker... done manually for now.
 595         go_rd_o = intpick1.go_rd_o
 596         go_wr_o = intpick1.go_wr_o
 597         go_rd_i = intfus.go_rd_i
 598         go_wr_i = intfus.go_wr_i
 599         go_die_i = intfus.go_die_i
 600         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 601         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])  # rd
 602         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])  # wr
 603         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 604
 605         # Connect Picker
 606         # ---------
 607         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 608         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
 609         int_rd_o = intfus.readable_o
 610         int_wr_o = intfus.writable_o
 611         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 612         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 613
 614         # ---------
 615         # Shadow Matrix
 616         # ---------
 617
 618         comb += shadows.issue_i.eq(fn_issue_o)
 619         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 620         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 621         # ---------
 622         # NOTE; this setup is for the instruction order preservation...
 623
 624         # connect shadows / go_dies to Computation Units
 625         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 626         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 627
 628         # ok connect first n_int_fu shadows to busy lines, to create an
 629         # instruction-order linked-list-like arrangement, using a bit-matrix
 630         # (instead of e.g. a ring buffer).
 631
 632         # when written, the shadow can be cancelled (and was good)
 633         for i in range(n_intfus):
 634             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 635
 636         # *previous* instruction shadows *current* instruction, and, obviously,
 637         # if the previous is completed (!busy) don't cast the shadow!
 638         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 639         for i in range(n_intfus):
 640             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 641
 642         # ---------
 643         # ... and this is for branch speculation.  it uses the extra bit
 644         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 645         # only needs to set shadow_i, s_fail_i and s_good_i
 646
 647         # issue captures shadow_i (if enabled)
 648         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 649
 650         bactive = Signal(reset_less=True)
 651         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 652
 653         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 654         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 655             comb += bshadow.issue_i.eq(fn_issue_o)
 656             for i in range(n_intfus):
 657                 with m.If(fn_issue_o & (Const(1 << i))):
 658                     comb += bshadow.shadow_i[i][0].eq(1)
 659
 660         # finally, we need an indicator to the test infrastructure as to
 661         # whether the branch succeeded or failed, plus, link up to the
 662         # "recorder" of whether the instruction was under shadow or not
 663
 664         with m.If(br1.issue_i):
 665             sync += bspec.active_i.eq(1)
 666         with m.If(self.branch_succ_i):
 667             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 668         with m.If(self.branch_fail_i):
 669             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 670
 671         # branch is active (TODO: a better signal: this is over-using the
 672         # go_write signal - actually the branch should not be "writing")
 673         with m.If(br1.go_wr_i):
 674             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 675             sync += bspec.active_i.eq(0)
 676             comb += bspec.br_i.eq(1)
 677             # branch occurs if data == 1, failed if data == 0
 678             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 679             for i in range(n_intfus):
 680                 # *expected* direction of the branch matched against *actual*
 681                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 682                 # ... or it didn't
 683                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 684
 685         # ---------
 686         # Connect Register File(s)
 687         # ---------
 688         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 689         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 690         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 691
 692         # connect ALUs to regfule
 693         comb += int_dest.data_i.eq(cu.data_o)
 694         comb += cu.src1_i.eq(int_src1.data_o)
 695         comb += cu.src2_i.eq(int_src2.data_o)
 696
 697         # connect ALU Computation Units
 698         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 699         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 700         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 701
 702         return m
 703
 704     def __iter__(self):
 705         yield from self.intregs
 706         yield from self.fpregs
 707         yield self.int_dest_i
 708         yield self.int_src1_i
 709         yield self.int_src2_i
 710         yield self.issue_o
 711         yield self.branch_succ_i
 712         yield self.branch_fail_i
 713         yield self.branch_direction_o
 714
 715     def ports(self):
 716         return list(self)
 717
 718
 719 class IssueToScoreboard(Elaboratable):
 720
 721     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 722         self.qlen = qlen
 723         self.n_in = n_in
 724         self.n_out = n_out
 725         self.rwid = rwid
 726         self.opw = opwid
 727         self.n_regs = n_regs
 728
 729         mqbits = unsigned(int(log(qlen) / log(2))+2)
 730         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 731         self.p_ready_o = Signal()  # instructions were added
 732         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 733
 734         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 735         self.qlen_o = Signal(mqbits, reset_less=True)
 736
 737     def elaborate(self, platform):
 738         m = Module()
 739         comb = m.d.comb
 740         sync = m.d.sync
 741
 742         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 743                           self.n_in, self.n_out)
 744         sc = Scoreboard(self.rwid, self.n_regs)
 745         m.submodules.iq = iq
 746         m.submodules.sc = sc
 747
 748         # get at the regfile for testing
 749         self.intregs = sc.intregs
 750
 751         # and the "busy" signal and instruction queue length
 752         comb += self.busy_o.eq(sc.busy_o)
 753         comb += self.qlen_o.eq(iq.qlen_o)
 754
 755         # link up instruction queue
 756         comb += iq.p_add_i.eq(self.p_add_i)
 757         comb += self.p_ready_o.eq(iq.p_ready_o)
 758         for i in range(self.n_in):
 759             comb += eq(iq.data_i[i], self.data_i[i])
 760
 761         # take instruction and process it.  note that it's possible to
 762         # "inspect" the queue contents *without* actually removing the
 763         # items.  items are only removed when the
 764
 765         # in "waiting" state
 766         wait_issue_br = Signal()
 767         wait_issue_alu = Signal()
 768         wait_issue_ls = Signal()
 769
 770         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 771             # set instruction pop length to 1 if the unit accepted
 772             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 773                 with m.If(iq.qlen_o != 0):
 774                     comb += iq.n_sub_i.eq(1)
 775             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 776                 with m.If(iq.qlen_o != 0):
 777                     comb += iq.n_sub_i.eq(1)
 778             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 779                 with m.If(iq.qlen_o != 0):
 780                     comb += iq.n_sub_i.eq(1)
 781
 782         # see if some instruction(s) are here.  note that this is
 783         # "inspecting" the in-place queue.  note also that on the
 784         # cycle following "waiting" for fn_issue_o to be set, the
 785         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 786         with m.If(iq.qlen_o != 0):
 787             # get the operands and operation
 788             imm = iq.data_o[0].imm_i
 789             dest = iq.data_o[0].dest_i
 790             src1 = iq.data_o[0].src1_i
 791             src2 = iq.data_o[0].src2_i
 792             op = iq.data_o[0].oper_i
 793             opi = iq.data_o[0].opim_i  # immediate set
 794
 795             # set the src/dest regs
 796             comb += sc.int_dest_i.eq(dest)
 797             comb += sc.int_src1_i.eq(src1)
 798             comb += sc.int_src2_i.eq(src2)
 799             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 800
 801             # choose a Function-Unit-Group
 802             with m.If((op & (0x3 << 2)) != 0):  # branch
 803                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 804                 comb += sc.br_imm_i.eq(imm)
 805                 comb += sc.brissue.insn_i.eq(1)
 806                 comb += wait_issue_br.eq(1)
 807             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 808                 # see compldst.py
 809                 # bit 0: ADD/SUB
 810                 # bit 1: immed
 811                 # bit 4: LD
 812                 # bit 5: ST
 813                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 814                 comb += sc.ls_imm_i.eq(imm)
 815                 comb += sc.lsissue.insn_i.eq(1)
 816                 comb += wait_issue_ls.eq(1)
 817             with m.Else():  # alu
 818                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 819                 comb += sc.alu_imm_i.eq(imm)
 820                 comb += sc.aluissue.insn_i.eq(1)
 821                 comb += wait_issue_alu.eq(1)
 822
 823             # XXX TODO
 824             # these indicate that the instruction is to be made
 825             # shadow-dependent on
 826             # (either) branch success or branch fail
 827             # yield sc.branch_fail_i.eq(branch_fail)
 828             # yield sc.branch_succ_i.eq(branch_success)
 829
 830         return m
 831
 832     def __iter__(self):
 833         yield self.p_ready_o
 834         for o in self.data_i:
 835             yield from list(o)
 836         yield self.p_add_i
 837
 838     def ports(self):
 839         return list(self)
 840
 841
 842 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 843             branch_success, branch_fail):
 844     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 845                'src1_i': src1, 'src2_i': src2}]
 846
 847     sendlen = 1
 848     for idx in range(sendlen):
 849         yield from eq(dut.data_i[idx], instrs[idx])
 850         di = yield dut.data_i[idx]
 851         print("senddata %d %x" % (idx, di))
 852     yield dut.p_add_i.eq(sendlen)
 853     yield
 854     o_p_ready = yield dut.p_ready_o
 855     while not o_p_ready:
 856         yield
 857         o_p_ready = yield dut.p_ready_o
 858
 859     yield dut.p_add_i.eq(0)
 860
 861
 862 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 863     yield from disable_issue(dut)
 864     yield dut.int_dest_i.eq(dest)
 865     yield dut.int_src1_i.eq(src1)
 866     yield dut.int_src2_i.eq(src2)
 867     if (op & (0x3 << 2)) != 0:  # branch
 868         yield dut.brissue.insn_i.eq(1)
 869         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 870         yield dut.br_imm_i.eq(imm)
 871         dut_issue = dut.brissue
 872     else:
 873         yield dut.aluissue.insn_i.eq(1)
 874         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 875         yield dut.alu_imm_i.eq(imm)
 876         dut_issue = dut.aluissue
 877     yield dut.reg_enable_i.eq(1)
 878
 879     # these indicate that the instruction is to be made shadow-dependent on
 880     # (either) branch success or branch fail
 881     yield dut.branch_fail_i.eq(branch_fail)
 882     yield dut.branch_succ_i.eq(branch_success)
 883
 884     yield
 885     yield from wait_for_issue(dut, dut_issue)
 886
 887
 888 def print_reg(dut, rnums):
 889     rs = []
 890     for rnum in rnums:
 891         reg = yield dut.intregs.regs[rnum].reg
 892         rs.append("%x" % reg)
 893     rnums = map(str, rnums)
 894     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 895
 896
 897 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 898     insts = []
 899     for i in range(n_ops):
 900         src1 = randint(1, dut.n_regs-1)
 901         src2 = randint(1, dut.n_regs-1)
 902         imm = randint(1, (1 << dut.rwid)-1)
 903         dest = randint(1, dut.n_regs-1)
 904         op = randint(0, max_opnums)
 905         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 906
 907         if shadowing:
 908             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 909         else:
 910             insts.append((src1, src2, dest, op, opi, imm))
 911     return insts
 912
 913
 914 def wait_for_busy_clear(dut):
 915     while True:
 916         busy_o = yield dut.busy_o
 917         if not busy_o:
 918             break
 919         print("busy",)
 920         yield
 921
 922
 923 def disable_issue(dut):
 924     yield dut.aluissue.insn_i.eq(0)
 925     yield dut.brissue.insn_i.eq(0)
 926     yield dut.lsissue.insn_i.eq(0)
 927
 928
 929 def wait_for_issue(dut, dut_issue):
 930     while True:
 931         issue_o = yield dut_issue.fn_issue_o
 932         if issue_o:
 933             yield from disable_issue(dut)
 934             yield dut.reg_enable_i.eq(0)
 935             break
 936         print("busy",)
 937         # yield from print_reg(dut, [1,2,3])
 938         yield
 939     # yield from print_reg(dut, [1,2,3])
 940
 941
 942 def scoreboard_branch_sim(dut, alusim):
 943
 944     iseed = 3
 945
 946     for i in range(1):
 947
 948         print("rseed", iseed)
 949         seed(iseed)
 950         iseed += 1
 951
 952         yield dut.branch_direction_o.eq(0)
 953
 954         # set random values in the registers
 955         for i in range(1, dut.n_regs):
 956             val = 31+i*3
 957             val = randint(0, (1 << alusim.rwidth)-1)
 958             yield dut.intregs.regs[i].reg.eq(val)
 959             alusim.setval(i, val)
 960
 961         if False:
 962             # create some instructions: branches create a tree
 963             insts = create_random_ops(dut, 1, True, 1)
 964             #insts.append((6, 6, 1, 2, (0, 0)))
 965             #insts.append((4, 3, 3, 0, (0, 0)))
 966
 967             src1 = randint(1, dut.n_regs-1)
 968             src2 = randint(1, dut.n_regs-1)
 969             #op = randint(4, 7)
 970             op = 4  # only BGT at the moment
 971
 972             branch_ok = create_random_ops(dut, 1, True, 1)
 973             branch_fail = create_random_ops(dut, 1, True, 1)
 974
 975             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 976
 977         if True:
 978             insts = []
 979             insts.append((3, 5, 2, 0, (0, 0)))
 980             branch_ok = []
 981             branch_fail = []
 982             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 983             branch_ok.append(None)
 984             branch_fail.append((1, 1, 2, 0, (0, 1)))
 985             #branch_fail.append( None )
 986             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
 987
 988         siminsts = deepcopy(insts)
 989
 990         # issue instruction(s)
 991         i = -1
 992         instrs = insts
 993         branch_direction = 0
 994         while instrs:
 995             yield
 996             yield
 997             i += 1
 998             branch_direction = yield dut.branch_direction_o  # way branch went
 999             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1000             if branch_direction == 1 and shadow_on:
1001                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1002                 continue  # branch was "success" and this is a "failed"... skip
1003             if branch_direction == 2 and shadow_off:
1004                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1005                 continue  # branch was "fail" and this is a "success"... skip
1006             if branch_direction != 0:
1007                 shadow_on = 0
1008                 shadow_off = 0
1009             is_branch = op >= 4
1010             if is_branch:
1011                 branch_ok, branch_fail = dest
1012                 dest = src2
1013                 # ok zip up the branch success / fail instructions and
1014                 # drop them into the queue, one marked "to have branch success"
1015                 # the other to be marked shadow branch "fail".
1016                 # one out of each of these will be cancelled
1017                 for ok, fl in zip(branch_ok, branch_fail):
1018                     if ok:
1019                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1020                     if fl:
1021                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1022             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1023                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1024             yield from int_instr(dut, op, src1, src2, dest,
1025                                  shadow_on, shadow_off)
1026
1027         # wait for all instructions to stop before checking
1028         yield
1029         yield from wait_for_busy_clear(dut)
1030
1031         i = -1
1032         while siminsts:
1033             instr = siminsts.pop(0)
1034             if instr is None:
1035                 continue
1036             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1037             i += 1
1038             is_branch = op >= 4
1039             if is_branch:
1040                 branch_ok, branch_fail = dest
1041                 dest = src2
1042             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1043                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1044             branch_res = alusim.op(op, src1, src2, dest)
1045             if is_branch:
1046                 if branch_res:
1047                     siminsts += branch_ok
1048                 else:
1049                     siminsts += branch_fail
1050
1051         # check status
1052         yield from alusim.check(dut)
1053         yield from alusim.dump(dut)
1054
1055
1056 def scoreboard_sim(dut, alusim):
1057
1058     seed(0)
1059
1060     for i in range(1):
1061
1062         # set random values in the registers
1063         for i in range(1, dut.n_regs):
1064             #val = randint(0, (1<<alusim.rwidth)-1)
1065             #val = 31+i*3
1066             val = i
1067             yield dut.intregs.regs[i].reg.eq(val)
1068             alusim.setval(i, val)
1069
1070         # create some instructions (some random, some regression tests)
1071         instrs = []
1072         if False:
1073             instrs = create_random_ops(dut, 15, True, 4)
1074
1075         if True:  # LD/ST test (with immediate)
1076             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1077             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1078
1079         if True:
1080             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1081
1082         if True:
1083             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1084             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1085             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1086
1087         if True:
1088             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1089             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1090             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1091             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1092             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1093
1094         if False:
1095             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1096             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1097             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1098
1099         if False:
1100             instrs.append((5, 6, 2, 1))
1101             instrs.append((2, 2, 4, 0))
1102             #instrs.append((2, 2, 3, 1))
1103
1104         if False:
1105             instrs.append((2, 1, 2, 3))
1106
1107         if False:
1108             instrs.append((2, 6, 2, 1))
1109             instrs.append((2, 1, 2, 0))
1110
1111         if False:
1112             instrs.append((1, 2, 7, 2))
1113             instrs.append((7, 1, 5, 0))
1114             instrs.append((4, 4, 1, 1))
1115
1116         if False:
1117             instrs.append((5, 6, 2, 2))
1118             instrs.append((1, 1, 4, 1))
1119             instrs.append((6, 5, 3, 0))
1120
1121         if False:
1122             # Write-after-Write Hazard
1123             instrs.append((3, 6, 7, 2))
1124             instrs.append((4, 4, 7, 1))
1125
1126         if False:
1127             # self-read/write-after-write followed by Read-after-Write
1128             instrs.append((1, 1, 1, 1))
1129             instrs.append((1, 5, 3, 0))
1130
1131         if False:
1132             # Read-after-Write followed by self-read-after-write
1133             instrs.append((5, 6, 1, 2))
1134             instrs.append((1, 1, 1, 1))
1135
1136         if False:
1137             # self-read-write sandwich
1138             instrs.append((5, 6, 1, 2))
1139             instrs.append((1, 1, 1, 1))
1140             instrs.append((1, 5, 3, 0))
1141
1142         if False:
1143             # very weird failure
1144             instrs.append((5, 2, 5, 2))
1145             instrs.append((2, 6, 3, 0))
1146             instrs.append((4, 2, 2, 1))
1147
1148         if False:
1149             v1 = 4
1150             yield dut.intregs.regs[5].reg.eq(v1)
1151             alusim.setval(5, v1)
1152             yield dut.intregs.regs[3].reg.eq(5)
1153             alusim.setval(3, 5)
1154             instrs.append((5, 3, 3, 4, (0, 0)))
1155             instrs.append((4, 2, 1, 2, (0, 1)))
1156
1157         if False:
1158             v1 = 6
1159             yield dut.intregs.regs[5].reg.eq(v1)
1160             alusim.setval(5, v1)
1161             yield dut.intregs.regs[3].reg.eq(5)
1162             alusim.setval(3, 5)
1163             instrs.append((5, 3, 3, 4, (0, 0)))
1164             instrs.append((4, 2, 1, 2, (1, 0)))
1165
1166         if False:
1167             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1168             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1169             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1170             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1171             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1172             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1173             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1174             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1175             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1176
1177         # issue instruction(s), wait for issue to be free before proceeding
1178         for i, instr in enumerate(instrs):
1179             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1180
1181             print("instr %d: (%d, %d, %d, %d, %d, %d)" %
1182                   (i, src1, src2, dest, op, opi, imm))
1183             alusim.op(op, opi, imm, src1, src2, dest)
1184             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1185                                br_ok, br_fail)
1186
1187         # wait for all instructions to stop before checking
1188         while True:
1189             iqlen = yield dut.qlen_o
1190             if iqlen == 0:
1191                 break
1192             yield
1193         yield
1194         yield
1195         yield
1196         yield
1197         yield from wait_for_busy_clear(dut)
1198
1199         # check status
1200         yield from alusim.check(dut)
1201         yield from alusim.dump(dut)
1202
1203
1204 def test_scoreboard():
1205     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1206     alusim = RegSim(16, 8)
1207     memsim = MemSim(16, 8)
1208     vl = rtlil.convert(dut, ports=dut.ports())
1209     with open("test_scoreboard6600.il", "w") as f:
1210         f.write(vl)
1211
1212     run_simulation(dut, scoreboard_sim(dut, alusim),
1213                    vcd_name='test_scoreboard6600.vcd')
1214
1215     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1216     #                    vcd_name='test_scoreboard6600.vcd')
1217
1218
1219 if __name__ == '__main__':
1220     test_scoreboard()