src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from soc.regfile.regfile import RegFileArray, treereduce
   7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from soc.scoreboard.global_pending import GlobalPending
  10 from soc.scoreboard.group_picker import GroupPicker
  11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  14 from soc.scoreboard.memfu import MemFunctionUnits
  15
  16 from soc.experiment.compalu import ComputationUnitNoDelay
  17 from soc.experiment.compldst import LDSTCompUnit
  18 from soc.experiment.testmem import TestMemory
  19
  20 from soc.experiment.alu_hier import ALU, BranchALU
  21
  22 from soc.decoder.power_enums import InternalOp, Function
  23
  24 from nmutil.latch import SRLatch
  25 from nmutil.nmoperator import eq
  26
  27 from random import randint, seed
  28 from copy import deepcopy
  29 from math import log
  30
  31 from soc.experiment.sim import RegSim, MemSim
  32 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  33
  34
  35 class CompUnitsBase(Elaboratable):
  36     """ Computation Unit Base class.
  37
  38         Amazingly, this class works recursively.  It's supposed to just
  39         look after some ALUs (that can handle the same operations),
  40         grouping them together, however it turns out that the same code
  41         can also group *groups* of Computation Units together as well.
  42
  43         Basically it was intended just to concatenate the ALU's issue,
  44         go_rd etc. signals together, which start out as bits and become
  45         sequences.  Turns out that the same trick works just as well
  46         on Computation Units!
  47
  48         So this class may be used recursively to present a top-level
  49         sequential concatenation of all the signals in and out of
  50         ALUs, whilst at the same time making it convenient to group
  51         ALUs together.
  52
  53         At the lower level, the intent is that groups of (identical)
  54         ALUs may be passed the same operation.  Even beyond that,
  55         the intent is that that group of (identical) ALUs actually
  56         share the *same pipeline* and as such become a "Concurrent
  57         Computation Unit" as defined by Mitch Alsup (see section
  58         11.4.9.3)
  59     """
  60
  61     def __init__(self, rwid, units, ldstmode=False):
  62         """ Inputs:
  63
  64             * :rwid:   bit width of register file(s) - both FP and INT
  65             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  66         """
  67         self.units = units
  68         self.ldstmode = ldstmode
  69         self.rwid = rwid
  70         self.rwid = rwid
  71         if units and isinstance(units[0], CompUnitsBase):
  72             self.n_units = 0
  73             for u in self.units:
  74                 self.n_units += u.n_units
  75         else:
  76             self.n_units = len(units)
  77
  78         n_units = self.n_units
  79
  80         # inputs
  81         self.issue_i = Signal(n_units, reset_less=True)
  82         self.go_rd_i = Signal(n_units, reset_less=True)
  83         self.go_wr_i = Signal(n_units, reset_less=True)
  84         self.shadown_i = Signal(n_units, reset_less=True)
  85         self.go_die_i = Signal(n_units, reset_less=True)
  86         if ldstmode:
  87             self.go_ad_i = Signal(n_units, reset_less=True)
  88             self.go_st_i = Signal(n_units, reset_less=True)
  89
  90         # outputs
  91         self.busy_o = Signal(n_units, reset_less=True)
  92         self.rd_rel_o = Signal(n_units, reset_less=True)
  93         self.req_rel_o = Signal(n_units, reset_less=True)
  94         self.done_o = Signal(n_units, reset_less=True)
  95         if ldstmode:
  96             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
  97             self.st_o = Signal(n_units, reset_less=True)  # op is ST
  98             self.adr_rel_o = Signal(n_units, reset_less=True)
  99             self.sto_rel_o = Signal(n_units, reset_less=True)
 100             self.load_mem_o = Signal(n_units, reset_less=True)
 101             self.stwd_mem_o = Signal(n_units, reset_less=True)
 102             self.addr_o = Signal(rwid, reset_less=True)
 103
 104         # in/out register data (note: not register#, actual data)
 105         self.data_o = Signal(rwid, reset_less=True)
 106         self.src1_i = Signal(rwid, reset_less=True)
 107         self.src2_i = Signal(rwid, reset_less=True)
 108         # input operand
 109
 110     def elaborate(self, platform):
 111         m = Module()
 112         comb = m.d.comb
 113
 114         for i, alu in enumerate(self.units):
 115             setattr(m.submodules, "comp%d" % i, alu)
 116
 117         go_rd_l = []
 118         go_wr_l = []
 119         issue_l = []
 120         busy_l = []
 121         req_rel_l = []
 122         done_l = []
 123         rd_rel_l = []
 124         shadow_l = []
 125         godie_l = []
 126         for alu in self.units:
 127             req_rel_l.append(alu.req_rel_o)
 128             done_l.append(alu.done_o)
 129             rd_rel_l.append(alu.rd_rel_o)
 130             shadow_l.append(alu.shadown_i)
 131             godie_l.append(alu.go_die_i)
 132             go_wr_l.append(alu.go_wr_i)
 133             go_rd_l.append(alu.go_rd_i)
 134             issue_l.append(alu.issue_i)
 135             busy_l.append(alu.busy_o)
 136         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 137         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 138         comb += self.done_o.eq(Cat(*done_l))
 139         comb += self.busy_o.eq(Cat(*busy_l))
 140         comb += Cat(*godie_l).eq(self.go_die_i)
 141         comb += Cat(*shadow_l).eq(self.shadown_i)
 142         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 143         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 144         comb += Cat(*issue_l).eq(self.issue_i)
 145
 146         # connect data register input/output
 147
 148         # merge (OR) all integer FU / ALU outputs to a single value
 149         if self.units:
 150             data_o = treereduce(self.units, "data_o")
 151             comb += self.data_o.eq(data_o)
 152             if self.ldstmode:
 153                 addr_o = treereduce(self.units, "addr_o")
 154                 comb += self.addr_o.eq(addr_o)
 155
 156         for i, alu in enumerate(self.units):
 157             comb += alu.src1_i.eq(self.src1_i)
 158             comb += alu.src2_i.eq(self.src2_i)
 159
 160         if not self.ldstmode:
 161             return m
 162
 163         ldmem_l = []
 164         stmem_l = []
 165         go_ad_l = []
 166         go_st_l = []
 167         ld_l = []
 168         st_l = []
 169         adr_rel_l = []
 170         sto_rel_l = []
 171         for alu in self.units:
 172             ld_l.append(alu.ld_o)
 173             st_l.append(alu.st_o)
 174             adr_rel_l.append(alu.adr_rel_o)
 175             sto_rel_l.append(alu.sto_rel_o)
 176             ldmem_l.append(alu.load_mem_o)
 177             stmem_l.append(alu.stwd_mem_o)
 178             go_ad_l.append(alu.go_ad_i)
 179             go_st_l.append(alu.go_st_i)
 180         comb += self.ld_o.eq(Cat(*ld_l))
 181         comb += self.st_o.eq(Cat(*st_l))
 182         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 183         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 184         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 185         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 186         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 187         comb += Cat(*go_st_l).eq(self.go_st_i)
 188
 189         return m
 190
 191
 192 class CompUnitLDSTs(CompUnitsBase):
 193
 194     def __init__(self, rwid, opwid, n_ldsts, mem):
 195         """ Inputs:
 196
 197             * :rwid:   bit width of register file(s) - both FP and INT
 198             * :opwid:  operand bit width
 199         """
 200         self.opwid = opwid
 201
 202         # inputs
 203         self.oper_i = Signal(opwid, reset_less=True)
 204         self.imm_i = Signal(rwid, reset_less=True)
 205
 206         # Int ALUs
 207         self.alus = []
 208         for i in range(n_ldsts):
 209             self.alus.append(ALU(rwid))
 210
 211         units = []
 212         for alu in self.alus:
 213             aluopwid = 4  # see compldst.py for "internal" opcode
 214             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 215
 216         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 217
 218     def elaborate(self, platform):
 219         m = CompUnitsBase.elaborate(self, platform)
 220         comb = m.d.comb
 221
 222         # hand the same operation to all units, 4 lower bits though
 223         for alu in self.units:
 224             comb += alu.oper_i[0:4].eq(self.oper_i)
 225             comb += alu.imm_i.eq(self.imm_i)
 226             comb += alu.isalu_i.eq(0)
 227
 228         return m
 229
 230
 231 class CompUnitALUs(CompUnitsBase):
 232
 233     def __init__(self, rwid, opwid, n_alus):
 234         """ Inputs:
 235
 236             * :rwid:   bit width of register file(s) - both FP and INT
 237             * :opwid:  operand bit width
 238         """
 239         self.opwid = opwid
 240
 241         # inputs
 242         self.oper_i = Signal(opwid, reset_less=True)
 243         self.imm_i = Signal(rwid, reset_less=True)
 244
 245         # Int ALUs
 246         alus = []
 247         for i in range(n_alus):
 248             alus.append(ALU(rwid))
 249
 250         units = []
 251         for alu in alus:
 252             aluopwid = 3  # extra bit for immediate mode
 253             units.append(ComputationUnitNoDelay(rwid, alu))
 254
 255         CompUnitsBase.__init__(self, rwid, units)
 256
 257     def elaborate(self, platform):
 258         m = CompUnitsBase.elaborate(self, platform)
 259         comb = m.d.comb
 260
 261         # hand the same operation to all units, only lower 3 bits though
 262         for alu in self.units:
 263             comb += alu.oper_i[0:3].eq(self.oper_i)
 264             comb += alu.imm_i.eq(self.imm_i)
 265
 266         return m
 267
 268
 269 class CompUnitBR(CompUnitsBase):
 270
 271     def __init__(self, rwid, opwid):
 272         """ Inputs:
 273
 274             * :rwid:   bit width of register file(s) - both FP and INT
 275             * :opwid:  operand bit width
 276
 277             Note: bgt unit is returned so that a shadow unit can be created
 278             for it
 279         """
 280         self.opwid = opwid
 281
 282         # inputs
 283         self.oper_i = Signal(opwid, reset_less=True)
 284         self.imm_i = Signal(rwid, reset_less=True)
 285
 286         # Branch ALU and CU
 287         self.bgt = BranchALU(rwid)
 288         aluopwid = 3  # extra bit for immediate mode
 289         self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
 290         CompUnitsBase.__init__(self, rwid, [self.br1])
 291
 292     def elaborate(self, platform):
 293         m = CompUnitsBase.elaborate(self, platform)
 294         comb = m.d.comb
 295
 296         # hand the same operation to all units
 297         for alu in self.units:
 298             comb += alu.oper_i.eq(self.oper_i)
 299             comb += alu.imm_i.eq(self.imm_i)
 300
 301         return m
 302
 303
 304 class FunctionUnits(Elaboratable):
 305
 306     def __init__(self, n_regs, n_int_alus):
 307         self.n_regs = n_regs
 308         self.n_int_alus = n_int_alus
 309
 310         self.dest_i = Signal(n_regs, reset_less=True)  # Dest R# in
 311         self.src1_i = Signal(n_regs, reset_less=True)  # oper1 R# in
 312         self.src2_i = Signal(n_regs, reset_less=True)  # oper2 R# in
 313
 314         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 315         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 316
 317         self.dest_rsel_o = Signal(n_regs, reset_less=True)  # dest reg (bot)
 318         self.src1_rsel_o = Signal(n_regs, reset_less=True)  # src1 reg (bot)
 319         self.src2_rsel_o = Signal(n_regs, reset_less=True)  # src2 reg (bot)
 320
 321         self.readable_o = Signal(n_int_alus, reset_less=True)
 322         self.writable_o = Signal(n_int_alus, reset_less=True)
 323
 324         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 325         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 326         self.go_die_i = Signal(n_int_alus, reset_less=True)
 327         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 328
 329         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 330
 331     def elaborate(self, platform):
 332         m = Module()
 333         comb = m.d.comb
 334         sync = m.d.sync
 335
 336         n_intfus = self.n_int_alus
 337
 338         # Integer FU-FU Dep Matrix
 339         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 340         m.submodules.intfudeps = intfudeps
 341         # Integer FU-Reg Dep Matrix
 342         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 343         m.submodules.intregdeps = intregdeps
 344
 345         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 346         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 347
 348         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 349         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 350
 351         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 352         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 353         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 354
 355         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 356         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 357         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 358         comb += intfudeps.go_die_i.eq(self.go_die_i)
 359         comb += self.readable_o.eq(intfudeps.readable_o)
 360         comb += self.writable_o.eq(intfudeps.writable_o)
 361
 362         # Connect function issue / arrays, and dest/src1/src2
 363         comb += intregdeps.dest_i.eq(self.dest_i)
 364         comb += intregdeps.src_i[0].eq(self.src1_i)
 365         comb += intregdeps.src_i[1].eq(self.src2_i)
 366
 367         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 368         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 369         comb += intregdeps.go_die_i.eq(self.go_die_i)
 370         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 371
 372         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 373         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 374         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 375
 376         return m
 377
 378
 379 class Scoreboard(Elaboratable):
 380     def __init__(self, rwid, n_regs):
 381         """ Inputs:
 382
 383             * :rwid:   bit width of register file(s) - both FP and INT
 384             * :n_regs: depth of register file(s) - number of FP and INT regs
 385         """
 386         self.rwid = rwid
 387         self.n_regs = n_regs
 388
 389         # Register Files
 390         self.intregs = RegFileArray(rwid, n_regs)
 391         self.fpregs = RegFileArray(rwid, n_regs)
 392
 393         # Memory (test for now)
 394         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 395
 396         # issue q needs to get at these
 397         self.aluissue = IssueUnitGroup(2)
 398         self.lsissue = IssueUnitGroup(2)
 399         self.brissue = IssueUnitGroup(1)
 400         # and these
 401         self.alu_oper_i = Signal(4, reset_less=True)
 402         self.alu_imm_i = Signal(rwid, reset_less=True)
 403         self.br_oper_i = Signal(4, reset_less=True)
 404         self.br_imm_i = Signal(rwid, reset_less=True)
 405         self.ls_oper_i = Signal(4, reset_less=True)
 406         self.ls_imm_i = Signal(rwid, reset_less=True)
 407
 408         # inputs
 409         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 410         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 411         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 412         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 413
 414         # outputs
 415         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 416         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 417
 418         # for branch speculation experiment.  branch_direction = 0 if
 419         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 420         # branch_succ and branch_fail are requests to have the current
 421         # instruction be dependent on the branch unit "shadow" capability.
 422         self.branch_succ_i = Signal(reset_less=True)
 423         self.branch_fail_i = Signal(reset_less=True)
 424         self.branch_direction_o = Signal(2, reset_less=True)
 425
 426     def elaborate(self, platform):
 427         m = Module()
 428         comb = m.d.comb
 429         sync = m.d.sync
 430
 431         m.submodules.intregs = self.intregs
 432         m.submodules.fpregs = self.fpregs
 433         m.submodules.mem = mem = self.mem
 434
 435         # register ports
 436         int_dest = self.intregs.write_port("dest")
 437         int_src1 = self.intregs.read_port("src1")
 438         int_src2 = self.intregs.read_port("src2")
 439
 440         fp_dest = self.fpregs.write_port("dest")
 441         fp_src1 = self.fpregs.read_port("src1")
 442         fp_src2 = self.fpregs.read_port("src2")
 443
 444         # Int ALUs and BR ALUs
 445         n_int_alus = 5
 446         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 447         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 448
 449         # LDST Comp Units
 450         n_ldsts = 2
 451         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 452
 453         # Comp Units
 454         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 455         bgt = cub.bgt  # get at the branch computation unit
 456         br1 = cub.br1
 457
 458         # Int FUs
 459         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 460
 461         # Memory FUs
 462         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 463
 464         # Memory Priority Picker 1: one gateway per memory port
 465         # picks 1 reader and 1 writer to intreg
 466         mempick1 = GroupPicker(n_ldsts)
 467         m.submodules.mempick1 = mempick1
 468
 469         # Count of number of FUs
 470         n_intfus = n_int_alus
 471         n_fp_fus = 0  # for now
 472
 473         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 474         # picks 1 reader and 1 writer to intreg
 475         intpick1 = GroupPicker(n_intfus)
 476         m.submodules.intpick1 = intpick1
 477
 478         # INT/FP Issue Unit
 479         regdecode = RegDecode(self.n_regs)
 480         m.submodules.regdecode = regdecode
 481         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 482         m.submodules.issueunit = issueunit
 483
 484         # Shadow Matrix.  currently n_intfus shadows, to be used for
 485         # write-after-write hazards.  NOTE: there is one extra for branches,
 486         # so the shadow width is increased by 1
 487         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 488         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 489
 490         # record previous instruction to cast shadow on current instruction
 491         prev_shadow = Signal(n_intfus)
 492
 493         # Branch Speculation recorder.  tracks the success/fail state as
 494         # each instruction is issued, so that when the branch occurs the
 495         # allow/cancel can be issued as appropriate.
 496         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 497
 498         # ---------
 499         # ok start wiring things together...
 500         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 501         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 502         # ---------
 503
 504         # ---------
 505         # Issue Unit is where it starts.  set up some in/outs for this module
 506         # ---------
 507         comb += [regdecode.dest_i.eq(self.int_dest_i),
 508                  regdecode.src1_i.eq(self.int_src1_i),
 509                  regdecode.src2_i.eq(self.int_src2_i),
 510                  regdecode.enable_i.eq(self.reg_enable_i),
 511                  self.issue_o.eq(issueunit.issue_o)
 512                  ]
 513
 514         # take these to outside (issue needs them)
 515         comb += cua.oper_i.eq(self.alu_oper_i)
 516         comb += cua.imm_i.eq(self.alu_imm_i)
 517         comb += cub.oper_i.eq(self.br_oper_i)
 518         comb += cub.imm_i.eq(self.br_imm_i)
 519         comb += cul.oper_i.eq(self.ls_oper_i)
 520         comb += cul.imm_i.eq(self.ls_imm_i)
 521
 522         # TODO: issueunit.f (FP)
 523
 524         # and int function issue / busy arrays, and dest/src1/src2
 525         comb += intfus.dest_i.eq(regdecode.dest_o)
 526         comb += intfus.src1_i.eq(regdecode.src1_o)
 527         comb += intfus.src2_i.eq(regdecode.src2_o)
 528
 529         fn_issue_o = issueunit.fn_issue_o
 530
 531         comb += intfus.fn_issue_i.eq(fn_issue_o)
 532         comb += issueunit.busy_i.eq(cu.busy_o)
 533         comb += self.busy_o.eq(cu.busy_o.bool())
 534
 535         # ---------
 536         # Memory Function Unit
 537         # ---------
 538         reset_b = Signal(cul.n_units, reset_less=True)
 539         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 540
 541         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 542         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 543         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 544
 545         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 546         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 547         # issue_i.  multi-issue gets a bit more complex but not a lot.
 548         prior_ldsts = Signal(cul.n_units, reset_less=True)
 549         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 550         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 551             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 552         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 553             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 554
 555         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 556         # just immediately activate go_adr
 557         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 558
 559         # connect up address data
 560         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 561         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 562
 563         # connect loadable / storable to go_ld/go_st.
 564         # XXX should only be done when the memory ld/st has actually happened!
 565         go_st_i = Signal(cul.n_units, reset_less=True)
 566         go_ld_i = Signal(cul.n_units, reset_less=True)
 567         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 568                            cul.adr_rel_o & cul.ld_o)
 569         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 570                            cul.sto_rel_o & cul.st_o)
 571         comb += memfus.go_ld_i.eq(go_ld_i)
 572         comb += memfus.go_st_i.eq(go_st_i)
 573         #comb += cul.go_wr_i.eq(go_ld_i)
 574         comb += cul.go_st_i.eq(go_st_i)
 575
 576         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 577         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 578         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 579
 580         # ---------
 581         # merge shadow matrices outputs
 582         # ---------
 583
 584         # these are explained in ShadowMatrix docstring, and are to be
 585         # connected to the FUReg and FUFU Matrices, to get them to reset
 586         anydie = Signal(n_intfus, reset_less=True)
 587         allshadown = Signal(n_intfus, reset_less=True)
 588         shreset = Signal(n_intfus, reset_less=True)
 589         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 590         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 591         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 592
 593         # ---------
 594         # connect fu-fu matrix
 595         # ---------
 596
 597         # Group Picker... done manually for now.
 598         go_rd_o = intpick1.go_rd_o
 599         go_wr_o = intpick1.go_wr_o
 600         go_rd_i = intfus.go_rd_i
 601         go_wr_i = intfus.go_wr_i
 602         go_die_i = intfus.go_die_i
 603         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 604         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])  # rd
 605         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])  # wr
 606         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 607
 608         # Connect Picker
 609         # ---------
 610         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 611         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
 612         int_rd_o = intfus.readable_o
 613         int_wr_o = intfus.writable_o
 614         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 615         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 616
 617         # ---------
 618         # Shadow Matrix
 619         # ---------
 620
 621         comb += shadows.issue_i.eq(fn_issue_o)
 622         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 623         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 624         # ---------
 625         # NOTE; this setup is for the instruction order preservation...
 626
 627         # connect shadows / go_dies to Computation Units
 628         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 629         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 630
 631         # ok connect first n_int_fu shadows to busy lines, to create an
 632         # instruction-order linked-list-like arrangement, using a bit-matrix
 633         # (instead of e.g. a ring buffer).
 634
 635         # when written, the shadow can be cancelled (and was good)
 636         for i in range(n_intfus):
 637             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 638
 639         # *previous* instruction shadows *current* instruction, and, obviously,
 640         # if the previous is completed (!busy) don't cast the shadow!
 641         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 642         for i in range(n_intfus):
 643             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 644
 645         # ---------
 646         # ... and this is for branch speculation.  it uses the extra bit
 647         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 648         # only needs to set shadow_i, s_fail_i and s_good_i
 649
 650         # issue captures shadow_i (if enabled)
 651         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 652
 653         bactive = Signal(reset_less=True)
 654         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 655
 656         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 657         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 658             comb += bshadow.issue_i.eq(fn_issue_o)
 659             for i in range(n_intfus):
 660                 with m.If(fn_issue_o & (Const(1 << i))):
 661                     comb += bshadow.shadow_i[i][0].eq(1)
 662
 663         # finally, we need an indicator to the test infrastructure as to
 664         # whether the branch succeeded or failed, plus, link up to the
 665         # "recorder" of whether the instruction was under shadow or not
 666
 667         with m.If(br1.issue_i):
 668             sync += bspec.active_i.eq(1)
 669         with m.If(self.branch_succ_i):
 670             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 671         with m.If(self.branch_fail_i):
 672             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 673
 674         # branch is active (TODO: a better signal: this is over-using the
 675         # go_write signal - actually the branch should not be "writing")
 676         with m.If(br1.go_wr_i):
 677             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 678             sync += bspec.active_i.eq(0)
 679             comb += bspec.br_i.eq(1)
 680             # branch occurs if data == 1, failed if data == 0
 681             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 682             for i in range(n_intfus):
 683                 # *expected* direction of the branch matched against *actual*
 684                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 685                 # ... or it didn't
 686                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 687
 688         # ---------
 689         # Connect Register File(s)
 690         # ---------
 691         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 692         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 693         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 694
 695         # connect ALUs to regfule
 696         comb += int_dest.data_i.eq(cu.data_o)
 697         comb += cu.src1_i.eq(int_src1.data_o)
 698         comb += cu.src2_i.eq(int_src2.data_o)
 699
 700         # connect ALU Computation Units
 701         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 702         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 703         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 704
 705         return m
 706
 707     def __iter__(self):
 708         yield from self.intregs
 709         yield from self.fpregs
 710         yield self.int_dest_i
 711         yield self.int_src1_i
 712         yield self.int_src2_i
 713         yield self.issue_o
 714         yield self.branch_succ_i
 715         yield self.branch_fail_i
 716         yield self.branch_direction_o
 717
 718     def ports(self):
 719         return list(self)
 720
 721
 722 class IssueToScoreboard(Elaboratable):
 723
 724     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 725         self.qlen = qlen
 726         self.n_in = n_in
 727         self.n_out = n_out
 728         self.rwid = rwid
 729         self.opw = opwid
 730         self.n_regs = n_regs
 731
 732         mqbits = unsigned(int(log(qlen) / log(2))+2)
 733         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 734         self.p_ready_o = Signal()  # instructions were added
 735         self.data_i = Instruction._nq(n_in, "data_i")
 736
 737         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 738         self.qlen_o = Signal(mqbits, reset_less=True)
 739
 740     def elaborate(self, platform):
 741         m = Module()
 742         comb = m.d.comb
 743         sync = m.d.sync
 744
 745         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 746                           self.n_in, self.n_out)
 747         sc = Scoreboard(self.rwid, self.n_regs)
 748         m.submodules.iq = iq
 749         m.submodules.sc = sc
 750
 751         # get at the regfile for testing
 752         self.intregs = sc.intregs
 753
 754         # and the "busy" signal and instruction queue length
 755         comb += self.busy_o.eq(sc.busy_o)
 756         comb += self.qlen_o.eq(iq.qlen_o)
 757
 758         # link up instruction queue
 759         comb += iq.p_add_i.eq(self.p_add_i)
 760         comb += self.p_ready_o.eq(iq.p_ready_o)
 761         for i in range(self.n_in):
 762             comb += eq(iq.data_i[i], self.data_i[i])
 763
 764         # take instruction and process it.  note that it's possible to
 765         # "inspect" the queue contents *without* actually removing the
 766         # items.  items are only removed when the
 767
 768         # in "waiting" state
 769         wait_issue_br = Signal()
 770         wait_issue_alu = Signal()
 771         wait_issue_ls = Signal()
 772
 773         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 774             # set instruction pop length to 1 if the unit accepted
 775             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 776                 with m.If(iq.qlen_o != 0):
 777                     comb += iq.n_sub_i.eq(1)
 778             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 779                 with m.If(iq.qlen_o != 0):
 780                     comb += iq.n_sub_i.eq(1)
 781             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 782                 with m.If(iq.qlen_o != 0):
 783                     comb += iq.n_sub_i.eq(1)
 784
 785         # see if some instruction(s) are here.  note that this is
 786         # "inspecting" the in-place queue.  note also that on the
 787         # cycle following "waiting" for fn_issue_o to be set, the
 788         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 789         with m.If(iq.qlen_o != 0):
 790             # get the operands and operation
 791             imm = iq.data_o[0].imm_data.data
 792             dest = iq.data_o[0].write_reg.data
 793             src1 = iq.data_o[0].read_reg1.data
 794             src2 = iq.data_o[0].read_reg2.data
 795             op = iq.data_o[0].insn_type
 796             fu = iq.data_o[0].fn_unit
 797             opi = iq.data_o[0].imm_data.ok  # immediate set
 798
 799             # set the src/dest regs
 800             comb += sc.int_dest_i.eq(dest)
 801             comb += sc.int_src1_i.eq(src1)
 802             comb += sc.int_src2_i.eq(src2)
 803             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 804
 805             # choose a Function-Unit-Group
 806             with m.If((op & (0x3 << 2)) != 0):  # branch
 807                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 808                 comb += sc.br_imm_i.eq(imm)
 809                 comb += sc.brissue.insn_i.eq(1)
 810                 comb += wait_issue_br.eq(1)
 811             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 812                 # see compldst.py
 813                 # bit 0: ADD/SUB
 814                 # bit 1: immed
 815                 # bit 4: LD
 816                 # bit 5: ST
 817                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 818                 comb += sc.ls_imm_i.eq(imm)
 819                 comb += sc.lsissue.insn_i.eq(1)
 820                 comb += wait_issue_ls.eq(1)
 821             with m.Else():  # alu
 822                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 823                 comb += sc.alu_imm_i.eq(imm)
 824                 comb += sc.aluissue.insn_i.eq(1)
 825                 comb += wait_issue_alu.eq(1)
 826
 827             # XXX TODO
 828             # these indicate that the instruction is to be made
 829             # shadow-dependent on
 830             # (either) branch success or branch fail
 831             # yield sc.branch_fail_i.eq(branch_fail)
 832             # yield sc.branch_succ_i.eq(branch_success)
 833
 834         return m
 835
 836     def __iter__(self):
 837         yield self.p_ready_o
 838         for o in self.data_i:
 839             yield from list(o)
 840         yield self.p_add_i
 841
 842     def ports(self):
 843         return list(self)
 844
 845
 846 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 847             branch_success, branch_fail):
 848     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 849                 'imm_data': (imm, op_imm),
 850                'read_reg1': src1, 'read_reg2': src2}]
 851
 852     sendlen = 1
 853     for idx, instr in enumerate(instrs):
 854         imm, op_imm = instr['imm_data']
 855         reg1 = instr['read_reg1']
 856         reg2 = instr['read_reg2']
 857         dest = instr['write_reg']
 858         insn_type = instr['insn_type']
 859         fn_unit = instr['fn_unit']
 860         yield dut.data_i[idx].insn_type.eq(insn_type)
 861         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 862         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 863         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 864         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 865         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 866         yield dut.data_i[idx].write_reg.data.eq(dest)
 867         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 868         yield dut.data_i[idx].imm_data.data.eq(imm)
 869         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 870         di = yield dut.data_i[idx]
 871         print("senddata %d %x" % (idx, di))
 872     yield dut.p_add_i.eq(sendlen)
 873     yield
 874     o_p_ready = yield dut.p_ready_o
 875     while not o_p_ready:
 876         yield
 877         o_p_ready = yield dut.p_ready_o
 878
 879     yield dut.p_add_i.eq(0)
 880
 881
 882 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 883     yield from disable_issue(dut)
 884     yield dut.int_dest_i.eq(dest)
 885     yield dut.int_src1_i.eq(src1)
 886     yield dut.int_src2_i.eq(src2)
 887     if (op & (0x3 << 2)) != 0:  # branch
 888         yield dut.brissue.insn_i.eq(1)
 889         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 890         yield dut.br_imm_i.eq(imm)
 891         dut_issue = dut.brissue
 892     else:
 893         yield dut.aluissue.insn_i.eq(1)
 894         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 895         yield dut.alu_imm_i.eq(imm)
 896         dut_issue = dut.aluissue
 897     yield dut.reg_enable_i.eq(1)
 898
 899     # these indicate that the instruction is to be made shadow-dependent on
 900     # (either) branch success or branch fail
 901     yield dut.branch_fail_i.eq(branch_fail)
 902     yield dut.branch_succ_i.eq(branch_success)
 903
 904     yield
 905     yield from wait_for_issue(dut, dut_issue)
 906
 907
 908 def print_reg(dut, rnums):
 909     rs = []
 910     for rnum in rnums:
 911         reg = yield dut.intregs.regs[rnum].reg
 912         rs.append("%x" % reg)
 913     rnums = map(str, rnums)
 914     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 915
 916
 917 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 918     insts = []
 919     for i in range(n_ops):
 920         src1 = randint(1, dut.n_regs-1)
 921         src2 = randint(1, dut.n_regs-1)
 922         imm = randint(1, (1 << dut.rwid)-1)
 923         dest = randint(1, dut.n_regs-1)
 924         op = randint(0, max_opnums)
 925         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 926
 927         if shadowing:
 928             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 929         else:
 930             insts.append((src1, src2, dest, op, opi, imm))
 931     return insts
 932
 933
 934 def wait_for_busy_clear(dut):
 935     while True:
 936         busy_o = yield dut.busy_o
 937         if not busy_o:
 938             break
 939         print("busy",)
 940         yield
 941
 942
 943 def disable_issue(dut):
 944     yield dut.aluissue.insn_i.eq(0)
 945     yield dut.brissue.insn_i.eq(0)
 946     yield dut.lsissue.insn_i.eq(0)
 947
 948
 949 def wait_for_issue(dut, dut_issue):
 950     while True:
 951         issue_o = yield dut_issue.fn_issue_o
 952         if issue_o:
 953             yield from disable_issue(dut)
 954             yield dut.reg_enable_i.eq(0)
 955             break
 956         print("busy",)
 957         # yield from print_reg(dut, [1,2,3])
 958         yield
 959     # yield from print_reg(dut, [1,2,3])
 960
 961
 962 def scoreboard_branch_sim(dut, alusim):
 963
 964     iseed = 3
 965
 966     for i in range(1):
 967
 968         print("rseed", iseed)
 969         seed(iseed)
 970         iseed += 1
 971
 972         yield dut.branch_direction_o.eq(0)
 973
 974         # set random values in the registers
 975         for i in range(1, dut.n_regs):
 976             val = 31+i*3
 977             val = randint(0, (1 << alusim.rwidth)-1)
 978             yield dut.intregs.regs[i].reg.eq(val)
 979             alusim.setval(i, val)
 980
 981         if False:
 982             # create some instructions: branches create a tree
 983             insts = create_random_ops(dut, 1, True, 1)
 984             #insts.append((6, 6, 1, 2, (0, 0)))
 985             #insts.append((4, 3, 3, 0, (0, 0)))
 986
 987             src1 = randint(1, dut.n_regs-1)
 988             src2 = randint(1, dut.n_regs-1)
 989             #op = randint(4, 7)
 990             op = 4  # only BGT at the moment
 991
 992             branch_ok = create_random_ops(dut, 1, True, 1)
 993             branch_fail = create_random_ops(dut, 1, True, 1)
 994
 995             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 996
 997         if True:
 998             insts = []
 999             insts.append((3, 5, 2, 0, (0, 0)))
1000             branch_ok = []
1001             branch_fail = []
1002             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1003             branch_ok.append(None)
1004             branch_fail.append((1, 1, 2, 0, (0, 1)))
1005             #branch_fail.append( None )
1006             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1007
1008         siminsts = deepcopy(insts)
1009
1010         # issue instruction(s)
1011         i = -1
1012         instrs = insts
1013         branch_direction = 0
1014         while instrs:
1015             yield
1016             yield
1017             i += 1
1018             branch_direction = yield dut.branch_direction_o  # way branch went
1019             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1020             if branch_direction == 1 and shadow_on:
1021                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1022                 continue  # branch was "success" and this is a "failed"... skip
1023             if branch_direction == 2 and shadow_off:
1024                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1025                 continue  # branch was "fail" and this is a "success"... skip
1026             if branch_direction != 0:
1027                 shadow_on = 0
1028                 shadow_off = 0
1029             is_branch = op >= 4
1030             if is_branch:
1031                 branch_ok, branch_fail = dest
1032                 dest = src2
1033                 # ok zip up the branch success / fail instructions and
1034                 # drop them into the queue, one marked "to have branch success"
1035                 # the other to be marked shadow branch "fail".
1036                 # one out of each of these will be cancelled
1037                 for ok, fl in zip(branch_ok, branch_fail):
1038                     if ok:
1039                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1040                     if fl:
1041                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1042             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1043                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1044             yield from int_instr(dut, op, src1, src2, dest,
1045                                  shadow_on, shadow_off)
1046
1047         # wait for all instructions to stop before checking
1048         yield
1049         yield from wait_for_busy_clear(dut)
1050
1051         i = -1
1052         while siminsts:
1053             instr = siminsts.pop(0)
1054             if instr is None:
1055                 continue
1056             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1057             i += 1
1058             is_branch = op >= 4
1059             if is_branch:
1060                 branch_ok, branch_fail = dest
1061                 dest = src2
1062             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1063                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1064             branch_res = alusim.op(op, src1, src2, dest)
1065             if is_branch:
1066                 if branch_res:
1067                     siminsts += branch_ok
1068                 else:
1069                     siminsts += branch_fail
1070
1071         # check status
1072         yield from alusim.check(dut)
1073         yield from alusim.dump(dut)
1074
1075
1076 def scoreboard_sim(dut, alusim):
1077
1078     seed(0)
1079
1080     for i in range(1):
1081
1082         # set random values in the registers
1083         for i in range(1, dut.n_regs):
1084             #val = randint(0, (1<<alusim.rwidth)-1)
1085             #val = 31+i*3
1086             val = i
1087             yield dut.intregs.regs[i].reg.eq(val)
1088             alusim.setval(i, val)
1089
1090         # create some instructions (some random, some regression tests)
1091         instrs = []
1092         if False:
1093             instrs = create_random_ops(dut, 15, True, 4)
1094
1095         if False:  # LD/ST test (with immediate)
1096             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1097             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1098
1099         if False:
1100             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1101
1102         if False:
1103             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1104             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1105             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1106
1107         if True:
1108             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1109                            0, 0, (0, 0)))
1110
1111         if False:
1112             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1113             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1114             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1115             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1116             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1117
1118         if False:
1119             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1120             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1121             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1122
1123         if False:
1124             instrs.append((5, 6, 2, 1))
1125             instrs.append((2, 2, 4, 0))
1126             #instrs.append((2, 2, 3, 1))
1127
1128         if False:
1129             instrs.append((2, 1, 2, 3))
1130
1131         if False:
1132             instrs.append((2, 6, 2, 1))
1133             instrs.append((2, 1, 2, 0))
1134
1135         if False:
1136             instrs.append((1, 2, 7, 2))
1137             instrs.append((7, 1, 5, 0))
1138             instrs.append((4, 4, 1, 1))
1139
1140         if False:
1141             instrs.append((5, 6, 2, 2))
1142             instrs.append((1, 1, 4, 1))
1143             instrs.append((6, 5, 3, 0))
1144
1145         if False:
1146             # Write-after-Write Hazard
1147             instrs.append((3, 6, 7, 2))
1148             instrs.append((4, 4, 7, 1))
1149
1150         if False:
1151             # self-read/write-after-write followed by Read-after-Write
1152             instrs.append((1, 1, 1, 1))
1153             instrs.append((1, 5, 3, 0))
1154
1155         if False:
1156             # Read-after-Write followed by self-read-after-write
1157             instrs.append((5, 6, 1, 2))
1158             instrs.append((1, 1, 1, 1))
1159
1160         if False:
1161             # self-read-write sandwich
1162             instrs.append((5, 6, 1, 2))
1163             instrs.append((1, 1, 1, 1))
1164             instrs.append((1, 5, 3, 0))
1165
1166         if False:
1167             # very weird failure
1168             instrs.append((5, 2, 5, 2))
1169             instrs.append((2, 6, 3, 0))
1170             instrs.append((4, 2, 2, 1))
1171
1172         if False:
1173             v1 = 4
1174             yield dut.intregs.regs[5].reg.eq(v1)
1175             alusim.setval(5, v1)
1176             yield dut.intregs.regs[3].reg.eq(5)
1177             alusim.setval(3, 5)
1178             instrs.append((5, 3, 3, 4, (0, 0)))
1179             instrs.append((4, 2, 1, 2, (0, 1)))
1180
1181         if False:
1182             v1 = 6
1183             yield dut.intregs.regs[5].reg.eq(v1)
1184             alusim.setval(5, v1)
1185             yield dut.intregs.regs[3].reg.eq(5)
1186             alusim.setval(3, 5)
1187             instrs.append((5, 3, 3, 4, (0, 0)))
1188             instrs.append((4, 2, 1, 2, (1, 0)))
1189
1190         if False:
1191             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1192             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1193             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1194             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1195             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1196             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1197             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1198             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1199             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1200
1201         # issue instruction(s), wait for issue to be free before proceeding
1202         for i, instr in enumerate(instrs):
1203             print (i, instr)
1204             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1205
1206             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1207                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1208             alusim.op(op, opi, imm, src1, src2, dest)
1209             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1210                                br_ok, br_fail)
1211
1212         # wait for all instructions to stop before checking
1213         while True:
1214             iqlen = yield dut.qlen_o
1215             if iqlen == 0:
1216                 break
1217             yield
1218         yield
1219         yield
1220         yield
1221         yield
1222         yield from wait_for_busy_clear(dut)
1223
1224         # check status
1225         yield from alusim.check(dut)
1226         yield from alusim.dump(dut)
1227
1228
1229 def test_scoreboard():
1230     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1231     alusim = RegSim(16, 8)
1232     memsim = MemSim(16, 8)
1233     vl = rtlil.convert(dut, ports=dut.ports())
1234     with open("test_scoreboard6600.il", "w") as f:
1235         f.write(vl)
1236
1237     run_simulation(dut, scoreboard_sim(dut, alusim),
1238                    vcd_name='test_scoreboard6600.vcd')
1239
1240     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1241     #                    vcd_name='test_scoreboard6600.vcd')
1242
1243
1244 if __name__ == '__main__':
1245     test_scoreboard()