src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from soc.regfile.regfile import RegFileArray, treereduce
   7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from soc.scoreboard.global_pending import GlobalPending
  10 from soc.scoreboard.group_picker import GroupPicker
  11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  14 from soc.scoreboard.memfu import MemFunctionUnits
  15
  16 from compalu import ComputationUnitNoDelay
  17 from compldst import LDSTCompUnit
  18 from testmem import TestMemory
  19
  20 from alu_hier import ALU, BranchALU
  21 from nmutil.latch import SRLatch
  22 from nmutil.nmoperator import eq
  23
  24 from random import randint, seed
  25 from copy import deepcopy
  26 from math import log
  27
  28 from soc.experiment.sim import RegSim, MemSim
  29 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  30
  31
  32 class CompUnitsBase(Elaboratable):
  33     """ Computation Unit Base class.
  34
  35         Amazingly, this class works recursively.  It's supposed to just
  36         look after some ALUs (that can handle the same operations),
  37         grouping them together, however it turns out that the same code
  38         can also group *groups* of Computation Units together as well.
  39
  40         Basically it was intended just to concatenate the ALU's issue,
  41         go_rd etc. signals together, which start out as bits and become
  42         sequences.  Turns out that the same trick works just as well
  43         on Computation Units!
  44
  45         So this class may be used recursively to present a top-level
  46         sequential concatenation of all the signals in and out of
  47         ALUs, whilst at the same time making it convenient to group
  48         ALUs together.
  49
  50         At the lower level, the intent is that groups of (identical)
  51         ALUs may be passed the same operation.  Even beyond that,
  52         the intent is that that group of (identical) ALUs actually
  53         share the *same pipeline* and as such become a "Concurrent
  54         Computation Unit" as defined by Mitch Alsup (see section
  55         11.4.9.3)
  56     """
  57     def __init__(self, rwid, units, ldstmode=False):
  58         """ Inputs:
  59
  60             * :rwid:   bit width of register file(s) - both FP and INT
  61             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  62         """
  63         self.units = units
  64         self.ldstmode = ldstmode
  65         self.rwid = rwid
  66         self.rwid = rwid
  67         if units and isinstance(units[0], CompUnitsBase):
  68             self.n_units = 0
  69             for u in self.units:
  70                 self.n_units += u.n_units
  71         else:
  72             self.n_units = len(units)
  73
  74         n_units = self.n_units
  75
  76         # inputs
  77         self.issue_i = Signal(n_units, reset_less=True)
  78         self.go_rd_i = Signal(n_units, reset_less=True)
  79         self.go_wr_i = Signal(n_units, reset_less=True)
  80         self.shadown_i = Signal(n_units, reset_less=True)
  81         self.go_die_i = Signal(n_units, reset_less=True)
  82         if ldstmode:
  83             self.go_ad_i = Signal(n_units, reset_less=True)
  84             self.go_st_i = Signal(n_units, reset_less=True)
  85
  86         # outputs
  87         self.busy_o = Signal(n_units, reset_less=True)
  88         self.rd_rel_o = Signal(n_units, reset_less=True)
  89         self.req_rel_o = Signal(n_units, reset_less=True)
  90         self.done_o = Signal(n_units, reset_less=True)
  91         if ldstmode:
  92             self.ld_o = Signal(n_units, reset_less=True) # op is LD
  93             self.st_o = Signal(n_units, reset_less=True) # op is ST
  94             self.adr_rel_o = Signal(n_units, reset_less=True)
  95             self.sto_rel_o = Signal(n_units, reset_less=True)
  96             self.load_mem_o = Signal(n_units, reset_less=True)
  97             self.stwd_mem_o = Signal(n_units, reset_less=True)
  98             self.addr_o = Signal(rwid, reset_less=True)
  99
 100         # in/out register data (note: not register#, actual data)
 101         self.data_o = Signal(rwid, reset_less=True)
 102         self.src1_i = Signal(rwid, reset_less=True)
 103         self.src2_i = Signal(rwid, reset_less=True)
 104         # input operand
 105
 106     def elaborate(self, platform):
 107         m = Module()
 108         comb = m.d.comb
 109
 110         for i, alu in enumerate(self.units):
 111             setattr(m.submodules, "comp%d" % i, alu)
 112
 113         go_rd_l = []
 114         go_wr_l = []
 115         issue_l = []
 116         busy_l = []
 117         req_rel_l = []
 118         done_l = []
 119         rd_rel_l = []
 120         shadow_l = []
 121         godie_l = []
 122         for alu in self.units:
 123             req_rel_l.append(alu.req_rel_o)
 124             done_l.append(alu.done_o)
 125             rd_rel_l.append(alu.rd_rel_o)
 126             shadow_l.append(alu.shadown_i)
 127             godie_l.append(alu.go_die_i)
 128             go_wr_l.append(alu.go_wr_i)
 129             go_rd_l.append(alu.go_rd_i)
 130             issue_l.append(alu.issue_i)
 131             busy_l.append(alu.busy_o)
 132         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 133         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 134         comb += self.done_o.eq(Cat(*done_l))
 135         comb += self.busy_o.eq(Cat(*busy_l))
 136         comb += Cat(*godie_l).eq(self.go_die_i)
 137         comb += Cat(*shadow_l).eq(self.shadown_i)
 138         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 139         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 140         comb += Cat(*issue_l).eq(self.issue_i)
 141
 142         # connect data register input/output
 143
 144         # merge (OR) all integer FU / ALU outputs to a single value
 145         if self.units:
 146             data_o = treereduce(self.units, "data_o")
 147             comb += self.data_o.eq(data_o)
 148             if self.ldstmode:
 149                 addr_o = treereduce(self.units, "addr_o")
 150                 comb += self.addr_o.eq(addr_o)
 151
 152         for i, alu in enumerate(self.units):
 153             comb += alu.src1_i.eq(self.src1_i)
 154             comb += alu.src2_i.eq(self.src2_i)
 155
 156         if not self.ldstmode:
 157             return m
 158
 159         ldmem_l = []
 160         stmem_l = []
 161         go_ad_l = []
 162         go_st_l = []
 163         ld_l = []
 164         st_l = []
 165         adr_rel_l = []
 166         sto_rel_l = []
 167         for alu in self.units:
 168             ld_l.append(alu.ld_o)
 169             st_l.append(alu.st_o)
 170             adr_rel_l.append(alu.adr_rel_o)
 171             sto_rel_l.append(alu.sto_rel_o)
 172             ldmem_l.append(alu.load_mem_o)
 173             stmem_l.append(alu.stwd_mem_o)
 174             go_ad_l.append(alu.go_ad_i)
 175             go_st_l.append(alu.go_st_i)
 176         comb += self.ld_o.eq(Cat(*ld_l))
 177         comb += self.st_o.eq(Cat(*st_l))
 178         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 179         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 180         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 181         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 182         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 183         comb += Cat(*go_st_l).eq(self.go_st_i)
 184
 185         return m
 186
 187
 188 class CompUnitLDSTs(CompUnitsBase):
 189
 190     def __init__(self, rwid, opwid, n_ldsts, mem):
 191         """ Inputs:
 192
 193             * :rwid:   bit width of register file(s) - both FP and INT
 194             * :opwid:  operand bit width
 195         """
 196         self.opwid = opwid
 197
 198         # inputs
 199         self.oper_i = Signal(opwid, reset_less=True)
 200         self.imm_i = Signal(rwid, reset_less=True)
 201
 202         # Int ALUs
 203         self.alus = []
 204         for i in range(n_ldsts):
 205             self.alus.append(ALU(rwid))
 206
 207         units = []
 208         for alu in self.alus:
 209             aluopwid = 4 # see compldst.py for "internal" opcode
 210             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 211
 212         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 213
 214     def elaborate(self, platform):
 215         m = CompUnitsBase.elaborate(self, platform)
 216         comb = m.d.comb
 217
 218         # hand the same operation to all units, 4 lower bits though
 219         for alu in self.units:
 220             comb += alu.oper_i[0:4].eq(self.oper_i)
 221             comb += alu.imm_i.eq(self.imm_i)
 222             comb += alu.isalu_i.eq(0)
 223
 224         return m
 225
 226
 227 class CompUnitALUs(CompUnitsBase):
 228
 229     def __init__(self, rwid, opwid, n_alus):
 230         """ Inputs:
 231
 232             * :rwid:   bit width of register file(s) - both FP and INT
 233             * :opwid:  operand bit width
 234         """
 235         self.opwid = opwid
 236
 237         # inputs
 238         self.oper_i = Signal(opwid, reset_less=True)
 239         self.imm_i = Signal(rwid, reset_less=True)
 240
 241         # Int ALUs
 242         alus = []
 243         for i in range(n_alus):
 244             alus.append(ALU(rwid))
 245
 246         units = []
 247         for alu in alus:
 248             aluopwid = 3 # extra bit for immediate mode
 249             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 250
 251         CompUnitsBase.__init__(self, rwid, units)
 252
 253     def elaborate(self, platform):
 254         m = CompUnitsBase.elaborate(self, platform)
 255         comb = m.d.comb
 256
 257         # hand the same operation to all units, only lower 3 bits though
 258         for alu in self.units:
 259             comb += alu.oper_i[0:3].eq(self.oper_i)
 260             comb += alu.imm_i.eq(self.imm_i)
 261
 262         return m
 263
 264
 265 class CompUnitBR(CompUnitsBase):
 266
 267     def __init__(self, rwid, opwid):
 268         """ Inputs:
 269
 270             * :rwid:   bit width of register file(s) - both FP and INT
 271             * :opwid:  operand bit width
 272
 273             Note: bgt unit is returned so that a shadow unit can be created
 274             for it
 275         """
 276         self.opwid = opwid
 277
 278         # inputs
 279         self.oper_i = Signal(opwid, reset_less=True)
 280         self.imm_i = Signal(rwid, reset_less=True)
 281
 282         # Branch ALU and CU
 283         self.bgt = BranchALU(rwid)
 284         aluopwid = 3 # extra bit for immediate mode
 285         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 286         CompUnitsBase.__init__(self, rwid, [self.br1])
 287
 288     def elaborate(self, platform):
 289         m = CompUnitsBase.elaborate(self, platform)
 290         comb = m.d.comb
 291
 292         # hand the same operation to all units
 293         for alu in self.units:
 294             comb += alu.oper_i.eq(self.oper_i)
 295             comb += alu.imm_i.eq(self.imm_i)
 296
 297         return m
 298
 299
 300 class FunctionUnits(Elaboratable):
 301
 302     def __init__(self, n_regs, n_int_alus):
 303         self.n_regs = n_regs
 304         self.n_int_alus = n_int_alus
 305
 306         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 307         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 308         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 309
 310         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 311         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 312
 313         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 314         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 315         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 316
 317         self.readable_o = Signal(n_int_alus, reset_less=True)
 318         self.writable_o = Signal(n_int_alus, reset_less=True)
 319
 320         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 321         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 322         self.go_die_i = Signal(n_int_alus, reset_less=True)
 323         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 324
 325         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 326
 327     def elaborate(self, platform):
 328         m = Module()
 329         comb = m.d.comb
 330         sync = m.d.sync
 331
 332         n_intfus = self.n_int_alus
 333
 334         # Integer FU-FU Dep Matrix
 335         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 336         m.submodules.intfudeps = intfudeps
 337         # Integer FU-Reg Dep Matrix
 338         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 339         m.submodules.intregdeps = intregdeps
 340
 341         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 342         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 343
 344         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 345         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 346
 347         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 348         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 349         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 350
 351         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 352         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 353         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 354         comb += intfudeps.go_die_i.eq(self.go_die_i)
 355         comb += self.readable_o.eq(intfudeps.readable_o)
 356         comb += self.writable_o.eq(intfudeps.writable_o)
 357
 358         # Connect function issue / arrays, and dest/src1/src2
 359         comb += intregdeps.dest_i.eq(self.dest_i)
 360         comb += intregdeps.src_i[0].eq(self.src1_i)
 361         comb += intregdeps.src_i[1].eq(self.src2_i)
 362
 363         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 364         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 365         comb += intregdeps.go_die_i.eq(self.go_die_i)
 366         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 367
 368         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 369         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 370         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 371
 372         return m
 373
 374
 375 class Scoreboard(Elaboratable):
 376     def __init__(self, rwid, n_regs):
 377         """ Inputs:
 378
 379             * :rwid:   bit width of register file(s) - both FP and INT
 380             * :n_regs: depth of register file(s) - number of FP and INT regs
 381         """
 382         self.rwid = rwid
 383         self.n_regs = n_regs
 384
 385         # Register Files
 386         self.intregs = RegFileArray(rwid, n_regs)
 387         self.fpregs = RegFileArray(rwid, n_regs)
 388
 389         # Memory (test for now)
 390         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 391
 392         # issue q needs to get at these
 393         self.aluissue = IssueUnitGroup(2)
 394         self.lsissue = IssueUnitGroup(2)
 395         self.brissue = IssueUnitGroup(1)
 396         # and these
 397         self.alu_oper_i = Signal(4, reset_less=True)
 398         self.alu_imm_i = Signal(rwid, reset_less=True)
 399         self.br_oper_i = Signal(4, reset_less=True)
 400         self.br_imm_i = Signal(rwid, reset_less=True)
 401         self.ls_oper_i = Signal(4, reset_less=True)
 402         self.ls_imm_i = Signal(rwid, reset_less=True)
 403
 404         # inputs
 405         self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
 406         self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
 407         self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
 408         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 409
 410         # outputs
 411         self.issue_o = Signal(reset_less=True) # instruction was accepted
 412         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 413
 414         # for branch speculation experiment.  branch_direction = 0 if
 415         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 416         # branch_succ and branch_fail are requests to have the current
 417         # instruction be dependent on the branch unit "shadow" capability.
 418         self.branch_succ_i = Signal(reset_less=True)
 419         self.branch_fail_i = Signal(reset_less=True)
 420         self.branch_direction_o = Signal(2, reset_less=True)
 421
 422     def elaborate(self, platform):
 423         m = Module()
 424         comb = m.d.comb
 425         sync = m.d.sync
 426
 427         m.submodules.intregs = self.intregs
 428         m.submodules.fpregs = self.fpregs
 429         m.submodules.mem = mem = self.mem
 430
 431         # register ports
 432         int_dest = self.intregs.write_port("dest")
 433         int_src1 = self.intregs.read_port("src1")
 434         int_src2 = self.intregs.read_port("src2")
 435
 436         fp_dest = self.fpregs.write_port("dest")
 437         fp_src1 = self.fpregs.read_port("src1")
 438         fp_src2 = self.fpregs.read_port("src2")
 439
 440         # Int ALUs and BR ALUs
 441         n_int_alus = 5
 442         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 443         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 444
 445         # LDST Comp Units
 446         n_ldsts = 2
 447         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 448
 449         # Comp Units
 450         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 451         bgt = cub.bgt # get at the branch computation unit
 452         br1 = cub.br1
 453
 454         # Int FUs
 455         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 456
 457         # Memory FUs
 458         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 459
 460         # Memory Priority Picker 1: one gateway per memory port
 461         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 462         m.submodules.mempick1 = mempick1
 463
 464         # Count of number of FUs
 465         n_intfus = n_int_alus
 466         n_fp_fus = 0 # for now
 467
 468         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 469         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 470         m.submodules.intpick1 = intpick1
 471
 472         # INT/FP Issue Unit
 473         regdecode = RegDecode(self.n_regs)
 474         m.submodules.regdecode = regdecode
 475         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 476         m.submodules.issueunit = issueunit
 477
 478         # Shadow Matrix.  currently n_intfus shadows, to be used for
 479         # write-after-write hazards.  NOTE: there is one extra for branches,
 480         # so the shadow width is increased by 1
 481         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 482         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 483
 484         # record previous instruction to cast shadow on current instruction
 485         prev_shadow = Signal(n_intfus)
 486
 487         # Branch Speculation recorder.  tracks the success/fail state as
 488         # each instruction is issued, so that when the branch occurs the
 489         # allow/cancel can be issued as appropriate.
 490         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 491
 492         #---------
 493         # ok start wiring things together...
 494         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 495         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 496         #---------
 497
 498         #---------
 499         # Issue Unit is where it starts.  set up some in/outs for this module
 500         #---------
 501         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 502                      regdecode.src1_i.eq(self.int_src1_i),
 503                      regdecode.src2_i.eq(self.int_src2_i),
 504                      regdecode.enable_i.eq(self.reg_enable_i),
 505                      self.issue_o.eq(issueunit.issue_o)
 506                     ]
 507
 508         # take these to outside (issue needs them)
 509         comb += cua.oper_i.eq(self.alu_oper_i)
 510         comb += cua.imm_i.eq(self.alu_imm_i)
 511         comb += cub.oper_i.eq(self.br_oper_i)
 512         comb += cub.imm_i.eq(self.br_imm_i)
 513         comb += cul.oper_i.eq(self.ls_oper_i)
 514         comb += cul.imm_i.eq(self.ls_imm_i)
 515
 516         # TODO: issueunit.f (FP)
 517
 518         # and int function issue / busy arrays, and dest/src1/src2
 519         comb += intfus.dest_i.eq(regdecode.dest_o)
 520         comb += intfus.src1_i.eq(regdecode.src1_o)
 521         comb += intfus.src2_i.eq(regdecode.src2_o)
 522
 523         fn_issue_o = issueunit.fn_issue_o
 524
 525         comb += intfus.fn_issue_i.eq(fn_issue_o)
 526         comb += issueunit.busy_i.eq(cu.busy_o)
 527         comb += self.busy_o.eq(cu.busy_o.bool())
 528
 529         #---------
 530         # Memory Function Unit
 531         #---------
 532         reset_b = Signal(cul.n_units, reset_less=True)
 533         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 534
 535         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 536         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 537         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 538
 539         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 540         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 541         # issue_i.  multi-issue gets a bit more complex but not a lot.
 542         prior_ldsts = Signal(cul.n_units, reset_less=True)
 543         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 544         with m.If(self.ls_oper_i[3]): # LD bit of operand
 545             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 546         with m.If(self.ls_oper_i[2]): # ST bit of operand
 547             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 548
 549         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 550         # just immediately activate go_adr
 551         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 552
 553         # connect up address data
 554         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 555         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 556
 557         # connect loadable / storable to go_ld/go_st.
 558         # XXX should only be done when the memory ld/st has actually happened!
 559         go_st_i = Signal(cul.n_units, reset_less=True)
 560         go_ld_i = Signal(cul.n_units, reset_less=True)
 561         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
 562                                   cul.adr_rel_o & cul.ld_o)
 563         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 564                                   cul.sto_rel_o & cul.st_o)
 565         comb += memfus.go_ld_i.eq(go_ld_i)
 566         comb += memfus.go_st_i.eq(go_st_i)
 567         #comb += cul.go_wr_i.eq(go_ld_i)
 568         comb += cul.go_st_i.eq(go_st_i)
 569
 570         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 571         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 572         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 573
 574         #---------
 575         # merge shadow matrices outputs
 576         #---------
 577
 578         # these are explained in ShadowMatrix docstring, and are to be
 579         # connected to the FUReg and FUFU Matrices, to get them to reset
 580         anydie = Signal(n_intfus, reset_less=True)
 581         allshadown = Signal(n_intfus, reset_less=True)
 582         shreset = Signal(n_intfus, reset_less=True)
 583         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 584         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 585         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 586
 587         #---------
 588         # connect fu-fu matrix
 589         #---------
 590
 591         # Group Picker... done manually for now.
 592         go_rd_o = intpick1.go_rd_o
 593         go_wr_o = intpick1.go_wr_o
 594         go_rd_i = intfus.go_rd_i
 595         go_wr_i = intfus.go_wr_i
 596         go_die_i = intfus.go_die_i
 597         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 598         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 599         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 600         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 601
 602         # Connect Picker
 603         #---------
 604         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 605         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
 606         int_rd_o = intfus.readable_o
 607         int_wr_o = intfus.writable_o
 608         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 609         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 610
 611         #---------
 612         # Shadow Matrix
 613         #---------
 614
 615         comb += shadows.issue_i.eq(fn_issue_o)
 616         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 617         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 618         #---------
 619         # NOTE; this setup is for the instruction order preservation...
 620
 621         # connect shadows / go_dies to Computation Units
 622         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 623         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 624
 625         # ok connect first n_int_fu shadows to busy lines, to create an
 626         # instruction-order linked-list-like arrangement, using a bit-matrix
 627         # (instead of e.g. a ring buffer).
 628
 629         # when written, the shadow can be cancelled (and was good)
 630         for i in range(n_intfus):
 631             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 632
 633         # *previous* instruction shadows *current* instruction, and, obviously,
 634         # if the previous is completed (!busy) don't cast the shadow!
 635         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 636         for i in range(n_intfus):
 637             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 638
 639         #---------
 640         # ... and this is for branch speculation.  it uses the extra bit
 641         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 642         # only needs to set shadow_i, s_fail_i and s_good_i
 643
 644         # issue captures shadow_i (if enabled)
 645         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 646
 647         bactive = Signal(reset_less=True)
 648         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 649
 650         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 651         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 652             comb += bshadow.issue_i.eq(fn_issue_o)
 653             for i in range(n_intfus):
 654                 with m.If(fn_issue_o & (Const(1<<i))):
 655                     comb += bshadow.shadow_i[i][0].eq(1)
 656
 657         # finally, we need an indicator to the test infrastructure as to
 658         # whether the branch succeeded or failed, plus, link up to the
 659         # "recorder" of whether the instruction was under shadow or not
 660
 661         with m.If(br1.issue_i):
 662             sync += bspec.active_i.eq(1)
 663         with m.If(self.branch_succ_i):
 664             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 665         with m.If(self.branch_fail_i):
 666             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 667
 668         # branch is active (TODO: a better signal: this is over-using the
 669         # go_write signal - actually the branch should not be "writing")
 670         with m.If(br1.go_wr_i):
 671             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 672             sync += bspec.active_i.eq(0)
 673             comb += bspec.br_i.eq(1)
 674             # branch occurs if data == 1, failed if data == 0
 675             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 676             for i in range(n_intfus):
 677                 # *expected* direction of the branch matched against *actual*
 678                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 679                 # ... or it didn't
 680                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 681
 682         #---------
 683         # Connect Register File(s)
 684         #---------
 685         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 686         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 687         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 688
 689         # connect ALUs to regfule
 690         comb += int_dest.data_i.eq(cu.data_o)
 691         comb += cu.src1_i.eq(int_src1.data_o)
 692         comb += cu.src2_i.eq(int_src2.data_o)
 693
 694         # connect ALU Computation Units
 695         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 696         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 697         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 698
 699         return m
 700
 701     def __iter__(self):
 702         yield from self.intregs
 703         yield from self.fpregs
 704         yield self.int_dest_i
 705         yield self.int_src1_i
 706         yield self.int_src2_i
 707         yield self.issue_o
 708         yield self.branch_succ_i
 709         yield self.branch_fail_i
 710         yield self.branch_direction_o
 711
 712     def ports(self):
 713         return list(self)
 714
 715
 716 class IssueToScoreboard(Elaboratable):
 717
 718     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 719         self.qlen = qlen
 720         self.n_in = n_in
 721         self.n_out = n_out
 722         self.rwid = rwid
 723         self.opw = opwid
 724         self.n_regs = n_regs
 725
 726         mqbits = unsigned(int(log(qlen) / log(2))+2)
 727         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 728         self.p_ready_o = Signal() # instructions were added
 729         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 730
 731         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 732         self.qlen_o = Signal(mqbits, reset_less=True)
 733
 734     def elaborate(self, platform):
 735         m = Module()
 736         comb = m.d.comb
 737         sync = m.d.sync
 738
 739         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 740         sc = Scoreboard(self.rwid, self.n_regs)
 741         m.submodules.iq = iq
 742         m.submodules.sc = sc
 743
 744         # get at the regfile for testing
 745         self.intregs = sc.intregs
 746
 747         # and the "busy" signal and instruction queue length
 748         comb += self.busy_o.eq(sc.busy_o)
 749         comb += self.qlen_o.eq(iq.qlen_o)
 750
 751         # link up instruction queue
 752         comb += iq.p_add_i.eq(self.p_add_i)
 753         comb += self.p_ready_o.eq(iq.p_ready_o)
 754         for i in range(self.n_in):
 755             comb += eq(iq.data_i[i], self.data_i[i])
 756
 757         # take instruction and process it.  note that it's possible to
 758         # "inspect" the queue contents *without* actually removing the
 759         # items.  items are only removed when the
 760
 761         # in "waiting" state
 762         wait_issue_br = Signal()
 763         wait_issue_alu = Signal()
 764         wait_issue_ls = Signal()
 765
 766         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 767             # set instruction pop length to 1 if the unit accepted
 768             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 769                 with m.If(iq.qlen_o != 0):
 770                     comb += iq.n_sub_i.eq(1)
 771             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 772                 with m.If(iq.qlen_o != 0):
 773                     comb += iq.n_sub_i.eq(1)
 774             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 775                 with m.If(iq.qlen_o != 0):
 776                     comb += iq.n_sub_i.eq(1)
 777
 778         # see if some instruction(s) are here.  note that this is
 779         # "inspecting" the in-place queue.  note also that on the
 780         # cycle following "waiting" for fn_issue_o to be set, the
 781         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 782         with m.If(iq.qlen_o != 0):
 783             # get the operands and operation
 784             imm = iq.data_o[0].imm_i
 785             dest = iq.data_o[0].dest_i
 786             src1 = iq.data_o[0].src1_i
 787             src2 = iq.data_o[0].src2_i
 788             op = iq.data_o[0].oper_i
 789             opi = iq.data_o[0].opim_i # immediate set
 790
 791             # set the src/dest regs
 792             comb += sc.int_dest_i.eq(dest)
 793             comb += sc.int_src1_i.eq(src1)
 794             comb += sc.int_src2_i.eq(src2)
 795             comb += sc.reg_enable_i.eq(1) # enable the regfile
 796
 797             # choose a Function-Unit-Group
 798             with m.If((op & (0x3<<2)) != 0): # branch
 799                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 800                 comb += sc.br_imm_i.eq(imm)
 801                 comb += sc.brissue.insn_i.eq(1)
 802                 comb += wait_issue_br.eq(1)
 803             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 804                 # see compldst.py
 805                 # bit 0: ADD/SUB
 806                 # bit 1: immed
 807                 # bit 4: LD
 808                 # bit 5: ST
 809                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 810                 comb += sc.ls_imm_i.eq(imm)
 811                 comb += sc.lsissue.insn_i.eq(1)
 812                 comb += wait_issue_ls.eq(1)
 813             with m.Else(): # alu
 814                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 815                 comb += sc.alu_imm_i.eq(imm)
 816                 comb += sc.aluissue.insn_i.eq(1)
 817                 comb += wait_issue_alu.eq(1)
 818
 819             # XXX TODO
 820             # these indicate that the instruction is to be made
 821             # shadow-dependent on
 822             # (either) branch success or branch fail
 823             #yield sc.branch_fail_i.eq(branch_fail)
 824             #yield sc.branch_succ_i.eq(branch_success)
 825
 826         return m
 827
 828     def __iter__(self):
 829         yield self.p_ready_o
 830         for o in self.data_i:
 831             yield from list(o)
 832         yield self.p_add_i
 833
 834     def ports(self):
 835         return list(self)
 836
 837
 838 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 839             branch_success, branch_fail):
 840     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 841                'src1_i': src1, 'src2_i': src2}]
 842
 843     sendlen = 1
 844     for idx in range(sendlen):
 845         yield from eq(dut.data_i[idx], instrs[idx])
 846         di = yield dut.data_i[idx]
 847         print ("senddata %d %x" % (idx, di))
 848     yield dut.p_add_i.eq(sendlen)
 849     yield
 850     o_p_ready = yield dut.p_ready_o
 851     while not o_p_ready:
 852         yield
 853         o_p_ready = yield dut.p_ready_o
 854
 855     yield dut.p_add_i.eq(0)
 856
 857
 858 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 859     yield from disable_issue(dut)
 860     yield dut.int_dest_i.eq(dest)
 861     yield dut.int_src1_i.eq(src1)
 862     yield dut.int_src2_i.eq(src2)
 863     if (op & (0x3<<2)) != 0: # branch
 864         yield dut.brissue.insn_i.eq(1)
 865         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 866         yield dut.br_imm_i.eq(imm)
 867         dut_issue = dut.brissue
 868     else:
 869         yield dut.aluissue.insn_i.eq(1)
 870         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 871         yield dut.alu_imm_i.eq(imm)
 872         dut_issue = dut.aluissue
 873     yield dut.reg_enable_i.eq(1)
 874
 875     # these indicate that the instruction is to be made shadow-dependent on
 876     # (either) branch success or branch fail
 877     yield dut.branch_fail_i.eq(branch_fail)
 878     yield dut.branch_succ_i.eq(branch_success)
 879
 880     yield
 881     yield from wait_for_issue(dut, dut_issue)
 882
 883
 884 def print_reg(dut, rnums):
 885     rs = []
 886     for rnum in rnums:
 887         reg = yield dut.intregs.regs[rnum].reg
 888         rs.append("%x" % reg)
 889     rnums = map(str, rnums)
 890     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 891
 892
 893 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 894     insts = []
 895     for i in range(n_ops):
 896         src1 = randint(1, dut.n_regs-1)
 897         src2 = randint(1, dut.n_regs-1)
 898         imm = randint(1, (1<<dut.rwid)-1)
 899         dest = randint(1, dut.n_regs-1)
 900         op = randint(0, max_opnums)
 901         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 902
 903         if shadowing:
 904             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 905         else:
 906             insts.append((src1, src2, dest, op, opi, imm))
 907     return insts
 908
 909
 910 def wait_for_busy_clear(dut):
 911     while True:
 912         busy_o = yield dut.busy_o
 913         if not busy_o:
 914             break
 915         print ("busy",)
 916         yield
 917
 918 def disable_issue(dut):
 919     yield dut.aluissue.insn_i.eq(0)
 920     yield dut.brissue.insn_i.eq(0)
 921     yield dut.lsissue.insn_i.eq(0)
 922
 923
 924 def wait_for_issue(dut, dut_issue):
 925     while True:
 926         issue_o = yield dut_issue.fn_issue_o
 927         if issue_o:
 928             yield from disable_issue(dut)
 929             yield dut.reg_enable_i.eq(0)
 930             break
 931         print ("busy",)
 932         #yield from print_reg(dut, [1,2,3])
 933         yield
 934     #yield from print_reg(dut, [1,2,3])
 935
 936 def scoreboard_branch_sim(dut, alusim):
 937
 938     iseed = 3
 939
 940     for i in range(1):
 941
 942         print ("rseed", iseed)
 943         seed(iseed)
 944         iseed += 1
 945
 946         yield dut.branch_direction_o.eq(0)
 947
 948         # set random values in the registers
 949         for i in range(1, dut.n_regs):
 950             val = 31+i*3
 951             val = randint(0, (1<<alusim.rwidth)-1)
 952             yield dut.intregs.regs[i].reg.eq(val)
 953             alusim.setval(i, val)
 954
 955         if False:
 956             # create some instructions: branches create a tree
 957             insts = create_random_ops(dut, 1, True, 1)
 958             #insts.append((6, 6, 1, 2, (0, 0)))
 959             #insts.append((4, 3, 3, 0, (0, 0)))
 960
 961             src1 = randint(1, dut.n_regs-1)
 962             src2 = randint(1, dut.n_regs-1)
 963             #op = randint(4, 7)
 964             op = 4 # only BGT at the moment
 965
 966             branch_ok = create_random_ops(dut, 1, True, 1)
 967             branch_fail = create_random_ops(dut, 1, True, 1)
 968
 969             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 970
 971         if True:
 972             insts = []
 973             insts.append( (3, 5, 2, 0, (0, 0)) )
 974             branch_ok = []
 975             branch_fail = []
 976             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 977             branch_ok.append( None )
 978             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 979             #branch_fail.append( None )
 980             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 981
 982         siminsts = deepcopy(insts)
 983
 984         # issue instruction(s)
 985         i = -1
 986         instrs = insts
 987         branch_direction = 0
 988         while instrs:
 989             yield
 990             yield
 991             i += 1
 992             branch_direction = yield dut.branch_direction_o # way branch went
 993             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 994             if branch_direction == 1 and shadow_on:
 995                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 996                 continue # branch was "success" and this is a "failed"... skip
 997             if branch_direction == 2 and shadow_off:
 998                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 999                 continue # branch was "fail" and this is a "success"... skip
1000             if branch_direction != 0:
1001                 shadow_on = 0
1002                 shadow_off = 0
1003             is_branch = op >= 4
1004             if is_branch:
1005                 branch_ok, branch_fail = dest
1006                 dest = src2
1007                 # ok zip up the branch success / fail instructions and
1008                 # drop them into the queue, one marked "to have branch success"
1009                 # the other to be marked shadow branch "fail".
1010                 # one out of each of these will be cancelled
1011                 for ok, fl in zip(branch_ok, branch_fail):
1012                     if ok:
1013                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1014                     if fl:
1015                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1016             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1017                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1018             yield from int_instr(dut, op, src1, src2, dest,
1019                                  shadow_on, shadow_off)
1020
1021         # wait for all instructions to stop before checking
1022         yield
1023         yield from wait_for_busy_clear(dut)
1024
1025         i = -1
1026         while siminsts:
1027             instr = siminsts.pop(0)
1028             if instr is None:
1029                 continue
1030             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1031             i += 1
1032             is_branch = op >= 4
1033             if is_branch:
1034                 branch_ok, branch_fail = dest
1035                 dest = src2
1036             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1037                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1038             branch_res = alusim.op(op, src1, src2, dest)
1039             if is_branch:
1040                 if branch_res:
1041                     siminsts += branch_ok
1042                 else:
1043                     siminsts += branch_fail
1044
1045         # check status
1046         yield from alusim.check(dut)
1047         yield from alusim.dump(dut)
1048
1049
1050 def scoreboard_sim(dut, alusim):
1051
1052     seed(0)
1053
1054     for i in range(1):
1055
1056         # set random values in the registers
1057         for i in range(1, dut.n_regs):
1058             #val = randint(0, (1<<alusim.rwidth)-1)
1059             #val = 31+i*3
1060             val = i
1061             yield dut.intregs.regs[i].reg.eq(val)
1062             alusim.setval(i, val)
1063
1064         # create some instructions (some random, some regression tests)
1065         instrs = []
1066         if False:
1067             instrs = create_random_ops(dut, 15, True, 4)
1068
1069         if True: # LD/ST test (with immediate)
1070             instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1071             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1072
1073         if True:
1074             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1075
1076         if True:
1077             instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1078             instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1079             instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1080
1081         if True:
1082             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1083             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1084             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1085             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1086             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1087
1088         if False:
1089             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1090             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1091             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1092
1093         if False:
1094             instrs.append((5, 6, 2, 1))
1095             instrs.append((2, 2, 4, 0))
1096             #instrs.append((2, 2, 3, 1))
1097
1098         if False:
1099             instrs.append((2, 1, 2, 3))
1100
1101         if False:
1102             instrs.append((2, 6, 2, 1))
1103             instrs.append((2, 1, 2, 0))
1104
1105         if False:
1106             instrs.append((1, 2, 7, 2))
1107             instrs.append((7, 1, 5, 0))
1108             instrs.append((4, 4, 1, 1))
1109
1110         if False:
1111             instrs.append((5, 6, 2, 2))
1112             instrs.append((1, 1, 4, 1))
1113             instrs.append((6, 5, 3, 0))
1114
1115         if False:
1116             # Write-after-Write Hazard
1117             instrs.append( (3, 6, 7, 2) )
1118             instrs.append( (4, 4, 7, 1) )
1119
1120         if False:
1121             # self-read/write-after-write followed by Read-after-Write
1122             instrs.append((1, 1, 1, 1))
1123             instrs.append((1, 5, 3, 0))
1124
1125         if False:
1126             # Read-after-Write followed by self-read-after-write
1127             instrs.append((5, 6, 1, 2))
1128             instrs.append((1, 1, 1, 1))
1129
1130         if False:
1131             # self-read-write sandwich
1132             instrs.append((5, 6, 1, 2))
1133             instrs.append((1, 1, 1, 1))
1134             instrs.append((1, 5, 3, 0))
1135
1136         if False:
1137             # very weird failure
1138             instrs.append( (5, 2, 5, 2) )
1139             instrs.append( (2, 6, 3, 0) )
1140             instrs.append( (4, 2, 2, 1) )
1141
1142         if False:
1143             v1 = 4
1144             yield dut.intregs.regs[5].reg.eq(v1)
1145             alusim.setval(5, v1)
1146             yield dut.intregs.regs[3].reg.eq(5)
1147             alusim.setval(3, 5)
1148             instrs.append((5, 3, 3, 4, (0, 0)))
1149             instrs.append((4, 2, 1, 2, (0, 1)))
1150
1151         if False:
1152             v1 = 6
1153             yield dut.intregs.regs[5].reg.eq(v1)
1154             alusim.setval(5, v1)
1155             yield dut.intregs.regs[3].reg.eq(5)
1156             alusim.setval(3, 5)
1157             instrs.append((5, 3, 3, 4, (0, 0)))
1158             instrs.append((4, 2, 1, 2, (1, 0)))
1159
1160         if False:
1161             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1162             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1163             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1164             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1165             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1166             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1167             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1168             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1169             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1170
1171         # issue instruction(s), wait for issue to be free before proceeding
1172         for i, instr in enumerate(instrs):
1173             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1174
1175             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1176                     (i, src1, src2, dest, op, opi, imm))
1177             alusim.op(op, opi, imm, src1, src2, dest)
1178             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1179                                br_ok, br_fail)
1180
1181         # wait for all instructions to stop before checking
1182         while True:
1183             iqlen = yield dut.qlen_o
1184             if iqlen == 0:
1185                 break
1186             yield
1187         yield
1188         yield
1189         yield
1190         yield
1191         yield from wait_for_busy_clear(dut)
1192
1193         # check status
1194         yield from alusim.check(dut)
1195         yield from alusim.dump(dut)
1196
1197
1198 def test_scoreboard():
1199     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1200     alusim = RegSim(16, 8)
1201     memsim = MemSim(16, 8)
1202     vl = rtlil.convert(dut, ports=dut.ports())
1203     with open("test_scoreboard6600.il", "w") as f:
1204         f.write(vl)
1205
1206     run_simulation(dut, scoreboard_sim(dut, alusim),
1207                         vcd_name='test_scoreboard6600.vcd')
1208
1209     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1210     #                    vcd_name='test_scoreboard6600.vcd')
1211
1212
1213 if __name__ == '__main__':
1214     test_scoreboard()