src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117
 118         # outputs
 119         self.busy_o = Signal(n_units, reset_less=True)
 120         self.rd_rel_o = Signal(n_units, reset_less=True)
 121         self.req_rel_o = Signal(n_units, reset_less=True)
 122         if ldstmode:
 123             self.adr_rel_o = Signal(n_units, reset_less=True)
 124             self.sto_rel_o = Signal(n_units, reset_less=True)
 125             self.req_rel_o = Signal(n_units, reset_less=True)
 126             self.load_mem_o = Signal(n_units, reset_less=True)
 127             self.stwd_mem_o = Signal(n_units, reset_less=True)
 128
 129         # in/out register data (note: not register#, actual data)
 130         self.data_o = Signal(rwid, reset_less=True)
 131         self.src1_i = Signal(rwid, reset_less=True)
 132         self.src2_i = Signal(rwid, reset_less=True)
 133         # input operand
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137         comb = m.d.comb
 138
 139         for i, alu in enumerate(self.units):
 140             setattr(m.submodules, "comp%d" % i, alu)
 141
 142         go_rd_l = []
 143         go_wr_l = []
 144         issue_l = []
 145         busy_l = []
 146         req_rel_l = []
 147         rd_rel_l = []
 148         shadow_l = []
 149         godie_l = []
 150         for alu in self.units:
 151             req_rel_l.append(alu.req_rel_o)
 152             rd_rel_l.append(alu.rd_rel_o)
 153             shadow_l.append(alu.shadown_i)
 154             godie_l.append(alu.go_die_i)
 155             go_wr_l.append(alu.go_wr_i)
 156             go_rd_l.append(alu.go_rd_i)
 157             issue_l.append(alu.issue_i)
 158             busy_l.append(alu.busy_o)
 159         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 160         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 161         comb += self.busy_o.eq(Cat(*busy_l))
 162         comb += Cat(*godie_l).eq(self.go_die_i)
 163         comb += Cat(*shadow_l).eq(self.shadown_i)
 164         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 165         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # bit of a hack: treereduce needs a list with an item named "data_o"
 172         if self.units:
 173             data_o = treereduce(self.units)
 174             comb += self.data_o.eq(data_o)
 175
 176         for i, alu in enumerate(self.units):
 177             comb += alu.src1_i.eq(self.src1_i)
 178             comb += alu.src2_i.eq(self.src2_i)
 179
 180         if not self.ldstmode:
 181             return m
 182
 183         ldmem_l = []
 184         stmem_l = []
 185         go_ad_l = []
 186         adr_rel_l = []
 187         sto_rel_l = []
 188         for alu in self.units:
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 195         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 196         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 197         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 198         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 199
 200         return m
 201
 202
 203 class CompUnitLDSTs(CompUnitsBase):
 204
 205     def __init__(self, rwid, opwid, n_ldsts, mem):
 206         """ Inputs:
 207
 208             * :rwid:   bit width of register file(s) - both FP and INT
 209             * :opwid:  operand bit width
 210         """
 211         self.opwid = opwid
 212
 213         # inputs
 214         self.oper_i = Signal(opwid, reset_less=True)
 215         self.imm_i = Signal(rwid, reset_less=True)
 216
 217         # Int ALUs
 218         self.alus = []
 219         for i in range(n_ldsts):
 220             self.alus.append(ALU(rwid))
 221
 222         units = []
 223         for alu in self.alus:
 224             aluopwid = 4 # see compldst.py for "internal" opcode
 225             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 226
 227         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 228
 229     def elaborate(self, platform):
 230         m = CompUnitsBase.elaborate(self, platform)
 231         comb = m.d.comb
 232
 233         # hand the same operation to all units, 4 lower bits though
 234         for alu in self.units:
 235             comb += alu.oper_i[0:4].eq(self.oper_i)
 236             comb += alu.imm_i.eq(self.imm_i)
 237             comb += alu.isalu_i.eq(0)
 238
 239         return m
 240
 241
 242 class CompUnitALUs(CompUnitsBase):
 243
 244     def __init__(self, rwid, opwid, n_alus):
 245         """ Inputs:
 246
 247             * :rwid:   bit width of register file(s) - both FP and INT
 248             * :opwid:  operand bit width
 249         """
 250         self.opwid = opwid
 251
 252         # inputs
 253         self.oper_i = Signal(opwid, reset_less=True)
 254         self.imm_i = Signal(rwid, reset_less=True)
 255
 256         # Int ALUs
 257         alus = []
 258         for i in range(n_alus):
 259             alus.append(ALU(rwid))
 260
 261         units = []
 262         for alu in alus:
 263             aluopwid = 3 # extra bit for immediate mode
 264             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 265
 266         CompUnitsBase.__init__(self, rwid, units)
 267
 268     def elaborate(self, platform):
 269         m = CompUnitsBase.elaborate(self, platform)
 270         comb = m.d.comb
 271
 272         # hand the same operation to all units, only lower 3 bits though
 273         for alu in self.units:
 274             comb += alu.oper_i[0:3].eq(self.oper_i)
 275             comb += alu.imm_i.eq(self.imm_i)
 276
 277         return m
 278
 279
 280 class CompUnitBR(CompUnitsBase):
 281
 282     def __init__(self, rwid, opwid):
 283         """ Inputs:
 284
 285             * :rwid:   bit width of register file(s) - both FP and INT
 286             * :opwid:  operand bit width
 287
 288             Note: bgt unit is returned so that a shadow unit can be created
 289             for it
 290         """
 291         self.opwid = opwid
 292
 293         # inputs
 294         self.oper_i = Signal(opwid, reset_less=True)
 295         self.imm_i = Signal(rwid, reset_less=True)
 296
 297         # Branch ALU and CU
 298         self.bgt = BranchALU(rwid)
 299         aluopwid = 3 # extra bit for immediate mode
 300         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 301         CompUnitsBase.__init__(self, rwid, [self.br1])
 302
 303     def elaborate(self, platform):
 304         m = CompUnitsBase.elaborate(self, platform)
 305         comb = m.d.comb
 306
 307         # hand the same operation to all units
 308         for alu in self.units:
 309             comb += alu.oper_i.eq(self.oper_i)
 310             comb += alu.imm_i.eq(self.imm_i)
 311
 312         return m
 313
 314
 315 class FunctionUnits(Elaboratable):
 316
 317     def __init__(self, n_regs, n_int_alus):
 318         self.n_regs = n_regs
 319         self.n_int_alus = n_int_alus
 320
 321         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 322         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 323         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 324
 325         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 326         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 327
 328         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 329         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 330         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 331
 332         self.readable_o = Signal(n_int_alus, reset_less=True)
 333         self.writable_o = Signal(n_int_alus, reset_less=True)
 334
 335         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 336         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 337         self.go_die_i = Signal(n_int_alus, reset_less=True)
 338         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 339
 340         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 341
 342     def elaborate(self, platform):
 343         m = Module()
 344         comb = m.d.comb
 345         sync = m.d.sync
 346
 347         n_intfus = self.n_int_alus
 348
 349         # Integer FU-FU Dep Matrix
 350         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 351         m.submodules.intfudeps = intfudeps
 352         # Integer FU-Reg Dep Matrix
 353         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 354         m.submodules.intregdeps = intregdeps
 355
 356         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 357         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 358
 359         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 360         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 361
 362         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 363         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 364         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 365
 366         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 367         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 368         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 369         comb += intfudeps.go_die_i.eq(self.go_die_i)
 370         comb += self.readable_o.eq(intfudeps.readable_o)
 371         comb += self.writable_o.eq(intfudeps.writable_o)
 372
 373         # Connect function issue / arrays, and dest/src1/src2
 374         comb += intregdeps.dest_i.eq(self.dest_i)
 375         comb += intregdeps.src_i[0].eq(self.src1_i)
 376         comb += intregdeps.src_i[1].eq(self.src2_i)
 377
 378         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 379         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 380         comb += intregdeps.go_die_i.eq(self.go_die_i)
 381         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 382
 383         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 384         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 385         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 386
 387         return m
 388
 389
 390 class Scoreboard(Elaboratable):
 391     def __init__(self, rwid, n_regs):
 392         """ Inputs:
 393
 394             * :rwid:   bit width of register file(s) - both FP and INT
 395             * :n_regs: depth of register file(s) - number of FP and INT regs
 396         """
 397         self.rwid = rwid
 398         self.n_regs = n_regs
 399
 400         # Register Files
 401         self.intregs = RegFileArray(rwid, n_regs)
 402         self.fpregs = RegFileArray(rwid, n_regs)
 403
 404         # issue q needs to get at these
 405         self.aluissue = IssueUnitGroup(2)
 406         self.lsissue = IssueUnitGroup(2)
 407         self.brissue = IssueUnitGroup(1)
 408         # and these
 409         self.alu_oper_i = Signal(4, reset_less=True)
 410         self.alu_imm_i = Signal(rwid, reset_less=True)
 411         self.br_oper_i = Signal(4, reset_less=True)
 412         self.br_imm_i = Signal(rwid, reset_less=True)
 413         self.ls_oper_i = Signal(4, reset_less=True)
 414         self.ls_imm_i = Signal(rwid, reset_less=True)
 415
 416         # inputs
 417         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 418         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 419         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 420         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 421
 422         # outputs
 423         self.issue_o = Signal(reset_less=True) # instruction was accepted
 424         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 425
 426         # for branch speculation experiment.  branch_direction = 0 if
 427         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 428         # branch_succ and branch_fail are requests to have the current
 429         # instruction be dependent on the branch unit "shadow" capability.
 430         self.branch_succ_i = Signal(reset_less=True)
 431         self.branch_fail_i = Signal(reset_less=True)
 432         self.branch_direction_o = Signal(2, reset_less=True)
 433
 434     def elaborate(self, platform):
 435         m = Module()
 436         comb = m.d.comb
 437         sync = m.d.sync
 438
 439         m.submodules.intregs = self.intregs
 440         m.submodules.fpregs = self.fpregs
 441
 442         # register ports
 443         int_dest = self.intregs.write_port("dest")
 444         int_src1 = self.intregs.read_port("src1")
 445         int_src2 = self.intregs.read_port("src2")
 446
 447         fp_dest = self.fpregs.write_port("dest")
 448         fp_src1 = self.fpregs.read_port("src1")
 449         fp_src2 = self.fpregs.read_port("src2")
 450
 451         # Int ALUs and BR ALUs
 452         n_int_alus = 5
 453         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 454         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 455
 456         # LDST Comp Units
 457         n_ldsts = 2
 458         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 459
 460         # Comp Units
 461         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 462         bgt = cub.bgt # get at the branch computation unit
 463         br1 = cub.br1
 464
 465         # Int FUs
 466         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 467
 468         # Memory FUs
 469         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 470
 471         # Count of number of FUs
 472         n_intfus = n_int_alus
 473         n_fp_fus = 0 # for now
 474
 475         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 476         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 477         m.submodules.intpick1 = intpick1
 478
 479         # INT/FP Issue Unit
 480         regdecode = RegDecode(self.n_regs)
 481         m.submodules.regdecode = regdecode
 482         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 483         m.submodules.issueunit = issueunit
 484
 485         # Shadow Matrix.  currently n_intfus shadows, to be used for
 486         # write-after-write hazards.  NOTE: there is one extra for branches,
 487         # so the shadow width is increased by 1
 488         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 489         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 490
 491         # record previous instruction to cast shadow on current instruction
 492         prev_shadow = Signal(n_intfus)
 493
 494         # Branch Speculation recorder.  tracks the success/fail state as
 495         # each instruction is issued, so that when the branch occurs the
 496         # allow/cancel can be issued as appropriate.
 497         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 498
 499         #---------
 500         # ok start wiring things together...
 501         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 502         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 503         #---------
 504
 505         #---------
 506         # Issue Unit is where it starts.  set up some in/outs for this module
 507         #---------
 508         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 509                      regdecode.src1_i.eq(self.int_src1_i),
 510                      regdecode.src2_i.eq(self.int_src2_i),
 511                      regdecode.enable_i.eq(self.reg_enable_i),
 512                      self.issue_o.eq(issueunit.issue_o)
 513                     ]
 514
 515         # take these to outside (issue needs them)
 516         comb += cua.oper_i.eq(self.alu_oper_i)
 517         comb += cua.imm_i.eq(self.alu_imm_i)
 518         comb += cub.oper_i.eq(self.br_oper_i)
 519         comb += cub.imm_i.eq(self.br_imm_i)
 520         comb += cul.oper_i.eq(self.ls_oper_i)
 521         comb += cul.imm_i.eq(self.ls_imm_i)
 522
 523         # TODO: issueunit.f (FP)
 524
 525         # and int function issue / busy arrays, and dest/src1/src2
 526         comb += intfus.dest_i.eq(regdecode.dest_o)
 527         comb += intfus.src1_i.eq(regdecode.src1_o)
 528         comb += intfus.src2_i.eq(regdecode.src2_o)
 529
 530         fn_issue_o = issueunit.fn_issue_o
 531
 532         comb += intfus.fn_issue_i.eq(fn_issue_o)
 533         comb += issueunit.busy_i.eq(cu.busy_o)
 534         comb += self.busy_o.eq(cu.busy_o.bool())
 535
 536         #---------
 537         # Memory Function Unit
 538         #---------
 539         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 540         comb += memfus.addr_we_i.eq(cul.adr_rel_o) # Match enable on adr rel
 541
 542         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 543         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 544
 545         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 546         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 547         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 548
 549         #---------
 550         # merge shadow matrices outputs
 551         #---------
 552
 553         # these are explained in ShadowMatrix docstring, and are to be
 554         # connected to the FUReg and FUFU Matrices, to get them to reset
 555         anydie = Signal(n_intfus, reset_less=True)
 556         allshadown = Signal(n_intfus, reset_less=True)
 557         shreset = Signal(n_intfus, reset_less=True)
 558         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 559         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 560         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 561
 562         #---------
 563         # connect fu-fu matrix
 564         #---------
 565
 566         # Group Picker... done manually for now.
 567         go_rd_o = intpick1.go_rd_o
 568         go_wr_o = intpick1.go_wr_o
 569         go_rd_i = intfus.go_rd_i
 570         go_wr_i = intfus.go_wr_i
 571         go_die_i = intfus.go_die_i
 572         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 573         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 574         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 575         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 576
 577         # Connect Picker
 578         #---------
 579         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 580         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 581         int_rd_o = intfus.readable_o
 582         int_wr_o = intfus.writable_o
 583         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 584         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 585
 586         #---------
 587         # Shadow Matrix
 588         #---------
 589
 590         comb += shadows.issue_i.eq(fn_issue_o)
 591         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 592         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 593         #---------
 594         # NOTE; this setup is for the instruction order preservation...
 595
 596         # connect shadows / go_dies to Computation Units
 597         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 598         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 599
 600         # ok connect first n_int_fu shadows to busy lines, to create an
 601         # instruction-order linked-list-like arrangement, using a bit-matrix
 602         # (instead of e.g. a ring buffer).
 603         # XXX TODO
 604
 605         # when written, the shadow can be cancelled (and was good)
 606         for i in range(n_intfus):
 607             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 608
 609         # *previous* instruction shadows *current* instruction, and, obviously,
 610         # if the previous is completed (!busy) don't cast the shadow!
 611         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 612         for i in range(n_intfus):
 613             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 614
 615         #---------
 616         # ... and this is for branch speculation.  it uses the extra bit
 617         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 618         # only needs to set shadow_i, s_fail_i and s_good_i
 619
 620         # issue captures shadow_i (if enabled)
 621         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 622
 623         bactive = Signal(reset_less=True)
 624         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 625
 626         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 627         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 628             comb += bshadow.issue_i.eq(fn_issue_o)
 629             for i in range(n_intfus):
 630                 with m.If(fn_issue_o & (Const(1<<i))):
 631                     comb += bshadow.shadow_i[i][0].eq(1)
 632
 633         # finally, we need an indicator to the test infrastructure as to
 634         # whether the branch succeeded or failed, plus, link up to the
 635         # "recorder" of whether the instruction was under shadow or not
 636
 637         with m.If(br1.issue_i):
 638             sync += bspec.active_i.eq(1)
 639         with m.If(self.branch_succ_i):
 640             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 641         with m.If(self.branch_fail_i):
 642             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 643
 644         # branch is active (TODO: a better signal: this is over-using the
 645         # go_write signal - actually the branch should not be "writing")
 646         with m.If(br1.go_wr_i):
 647             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 648             sync += bspec.active_i.eq(0)
 649             comb += bspec.br_i.eq(1)
 650             # branch occurs if data == 1, failed if data == 0
 651             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 652             for i in range(n_intfus):
 653                 # *expected* direction of the branch matched against *actual*
 654                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 655                 # ... or it didn't
 656                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 657
 658         #---------
 659         # Connect Register File(s)
 660         #---------
 661         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 662         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 663         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 664
 665         # connect ALUs to regfule
 666         comb += int_dest.data_i.eq(cu.data_o)
 667         comb += cu.src1_i.eq(int_src1.data_o)
 668         comb += cu.src2_i.eq(int_src2.data_o)
 669
 670         # connect ALU Computation Units
 671         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 672         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 673         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 674
 675         return m
 676
 677     def __iter__(self):
 678         yield from self.intregs
 679         yield from self.fpregs
 680         yield self.int_dest_i
 681         yield self.int_src1_i
 682         yield self.int_src2_i
 683         yield self.issue_o
 684         yield self.branch_succ_i
 685         yield self.branch_fail_i
 686         yield self.branch_direction_o
 687
 688     def ports(self):
 689         return list(self)
 690
 691
 692 class IssueToScoreboard(Elaboratable):
 693
 694     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 695         self.qlen = qlen
 696         self.n_in = n_in
 697         self.n_out = n_out
 698         self.rwid = rwid
 699         self.opw = opwid
 700         self.n_regs = n_regs
 701
 702         mqbits = (int(log(qlen) / log(2))+2, False)
 703         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 704         self.p_ready_o = Signal() # instructions were added
 705         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 706
 707         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 708         self.qlen_o = Signal(mqbits, reset_less=True)
 709
 710     def elaborate(self, platform):
 711         m = Module()
 712         comb = m.d.comb
 713         sync = m.d.sync
 714
 715         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 716         sc = Scoreboard(self.rwid, self.n_regs)
 717         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 718         m.submodules.iq = iq
 719         m.submodules.sc = sc
 720         m.submodules.mem = mem
 721
 722         # get at the regfile for testing
 723         self.intregs = sc.intregs
 724
 725         # and the "busy" signal and instruction queue length
 726         comb += self.busy_o.eq(sc.busy_o)
 727         comb += self.qlen_o.eq(iq.qlen_o)
 728
 729         # link up instruction queue
 730         comb += iq.p_add_i.eq(self.p_add_i)
 731         comb += self.p_ready_o.eq(iq.p_ready_o)
 732         for i in range(self.n_in):
 733             comb += eq(iq.data_i[i], self.data_i[i])
 734
 735         # take instruction and process it.  note that it's possible to
 736         # "inspect" the queue contents *without* actually removing the
 737         # items.  items are only removed when the
 738
 739         # in "waiting" state
 740         wait_issue_br = Signal()
 741         wait_issue_alu = Signal()
 742         wait_issue_ls = Signal()
 743
 744         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 745             # set instruction pop length to 1 if the unit accepted
 746             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 747                 with m.If(iq.qlen_o != 0):
 748                     comb += iq.n_sub_i.eq(1)
 749             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 750                 with m.If(iq.qlen_o != 0):
 751                     comb += iq.n_sub_i.eq(1)
 752             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 753                 with m.If(iq.qlen_o != 0):
 754                     comb += iq.n_sub_i.eq(1)
 755
 756         # see if some instruction(s) are here.  note that this is
 757         # "inspecting" the in-place queue.  note also that on the
 758         # cycle following "waiting" for fn_issue_o to be set, the
 759         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 760         with m.If(iq.qlen_o != 0):
 761             # get the operands and operation
 762             imm = iq.data_o[0].imm_i
 763             dest = iq.data_o[0].dest_i
 764             src1 = iq.data_o[0].src1_i
 765             src2 = iq.data_o[0].src2_i
 766             op = iq.data_o[0].oper_i
 767             opi = iq.data_o[0].opim_i # immediate set
 768
 769             # set the src/dest regs
 770             comb += sc.int_dest_i.eq(dest)
 771             comb += sc.int_src1_i.eq(src1)
 772             comb += sc.int_src2_i.eq(src2)
 773             comb += sc.reg_enable_i.eq(1) # enable the regfile
 774
 775             # choose a Function-Unit-Group
 776             with m.If((op & (0x3<<2)) != 0): # branch
 777                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 778                 comb += sc.br_imm_i.eq(imm)
 779                 comb += sc.brissue.insn_i.eq(1)
 780                 comb += wait_issue_br.eq(1)
 781             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 782                 # see compldst.py
 783                 # bit 0: ADD/SUB
 784                 # bit 1: immed
 785                 # bit 4: LD
 786                 # bit 5: ST
 787                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 788                 comb += sc.ls_imm_i.eq(imm)
 789                 comb += sc.lsissue.insn_i.eq(1)
 790                 comb += wait_issue_ls.eq(1)
 791             with m.Else(): # alu
 792                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 793                 comb += sc.alu_imm_i.eq(imm)
 794                 comb += sc.aluissue.insn_i.eq(1)
 795                 comb += wait_issue_alu.eq(1)
 796
 797             # XXX TODO
 798             # these indicate that the instruction is to be made
 799             # shadow-dependent on
 800             # (either) branch success or branch fail
 801             #yield sc.branch_fail_i.eq(branch_fail)
 802             #yield sc.branch_succ_i.eq(branch_success)
 803
 804         return m
 805
 806     def __iter__(self):
 807         yield self.p_ready_o
 808         for o in self.data_i:
 809             yield from list(o)
 810         yield self.p_add_i
 811
 812     def ports(self):
 813         return list(self)
 814
 815
 816 IADD = 0
 817 ISUB = 1
 818 IMUL = 2
 819 ISHF = 3
 820 IBGT = 4
 821 IBLT = 5
 822 IBEQ = 6
 823 IBNE = 7
 824
 825
 826 class RegSim:
 827     def __init__(self, rwidth, nregs):
 828         self.rwidth = rwidth
 829         self.regs = [0] * nregs
 830
 831     def op(self, op, op_imm, imm, src1, src2, dest):
 832         maxbits = (1 << self.rwidth) - 1
 833         src1 = self.regs[src1] & maxbits
 834         if op_imm:
 835             src2 = imm
 836         else:
 837             src2 = self.regs[src2] & maxbits
 838         if op == IADD:
 839             val = src1 + src2
 840         elif op == ISUB:
 841             val = src1 - src2
 842         elif op == IMUL:
 843             val = src1 * src2
 844         elif op == ISHF:
 845             val = src1 >> (src2 & maxbits)
 846         elif op == IBGT:
 847             val = int(src1 > src2)
 848         elif op == IBLT:
 849             val = int(src1 < src2)
 850         elif op == IBEQ:
 851             val = int(src1 == src2)
 852         elif op == IBNE:
 853             val = int(src1 != src2)
 854         else:
 855             return 0 # LD/ST TODO
 856         val &= maxbits
 857         self.setval(dest, val)
 858         return val
 859
 860     def setval(self, dest, val):
 861         print ("sim setval", dest, hex(val))
 862         self.regs[dest] = val
 863
 864     def dump(self, dut):
 865         for i, val in enumerate(self.regs):
 866             reg = yield dut.intregs.regs[i].reg
 867             okstr = "OK" if reg == val else "!ok"
 868             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 869
 870     def check(self, dut):
 871         for i, val in enumerate(self.regs):
 872             reg = yield dut.intregs.regs[i].reg
 873             if reg != val:
 874                 print("reg %d expected %x received %x\n" % (i, val, reg))
 875                 yield from self.dump(dut)
 876                 assert False
 877
 878 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 879             branch_success, branch_fail):
 880     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 881                'src1_i': src1, 'src2_i': src2}]
 882
 883     sendlen = 1
 884     for idx in range(sendlen):
 885         yield from eq(dut.data_i[idx], instrs[idx])
 886         di = yield dut.data_i[idx]
 887         print ("senddata %d %x" % (idx, di))
 888     yield dut.p_add_i.eq(sendlen)
 889     yield
 890     o_p_ready = yield dut.p_ready_o
 891     while not o_p_ready:
 892         yield
 893         o_p_ready = yield dut.p_ready_o
 894
 895     yield dut.p_add_i.eq(0)
 896
 897
 898 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 899     yield from disable_issue(dut)
 900     yield dut.int_dest_i.eq(dest)
 901     yield dut.int_src1_i.eq(src1)
 902     yield dut.int_src2_i.eq(src2)
 903     if (op & (0x3<<2)) != 0: # branch
 904         yield dut.brissue.insn_i.eq(1)
 905         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 906         yield dut.br_imm_i.eq(imm)
 907         dut_issue = dut.brissue
 908     else:
 909         yield dut.aluissue.insn_i.eq(1)
 910         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 911         yield dut.alu_imm_i.eq(imm)
 912         dut_issue = dut.aluissue
 913     yield dut.reg_enable_i.eq(1)
 914
 915     # these indicate that the instruction is to be made shadow-dependent on
 916     # (either) branch success or branch fail
 917     yield dut.branch_fail_i.eq(branch_fail)
 918     yield dut.branch_succ_i.eq(branch_success)
 919
 920     yield
 921     yield from wait_for_issue(dut, dut_issue)
 922
 923
 924 def print_reg(dut, rnums):
 925     rs = []
 926     for rnum in rnums:
 927         reg = yield dut.intregs.regs[rnum].reg
 928         rs.append("%x" % reg)
 929     rnums = map(str, rnums)
 930     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 931
 932
 933 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 934     insts = []
 935     for i in range(n_ops):
 936         src1 = randint(1, dut.n_regs-1)
 937         src2 = randint(1, dut.n_regs-1)
 938         imm = randint(1, (1<<dut.rwid)-1)
 939         dest = randint(1, dut.n_regs-1)
 940         op = randint(0, max_opnums)
 941         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 942
 943         if shadowing:
 944             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 945         else:
 946             insts.append((src1, src2, dest, op, opi, imm))
 947     return insts
 948
 949
 950 def wait_for_busy_clear(dut):
 951     while True:
 952         busy_o = yield dut.busy_o
 953         if not busy_o:
 954             break
 955         print ("busy",)
 956         yield
 957
 958 def disable_issue(dut):
 959     yield dut.aluissue.insn_i.eq(0)
 960     yield dut.brissue.insn_i.eq(0)
 961     yield dut.lsissue.insn_i.eq(0)
 962
 963
 964 def wait_for_issue(dut, dut_issue):
 965     while True:
 966         issue_o = yield dut_issue.fn_issue_o
 967         if issue_o:
 968             yield from disable_issue(dut)
 969             yield dut.reg_enable_i.eq(0)
 970             break
 971         print ("busy",)
 972         #yield from print_reg(dut, [1,2,3])
 973         yield
 974     #yield from print_reg(dut, [1,2,3])
 975
 976 def scoreboard_branch_sim(dut, alusim):
 977
 978     iseed = 3
 979
 980     for i in range(1):
 981
 982         print ("rseed", iseed)
 983         seed(iseed)
 984         iseed += 1
 985
 986         yield dut.branch_direction_o.eq(0)
 987
 988         # set random values in the registers
 989         for i in range(1, dut.n_regs):
 990             val = 31+i*3
 991             val = randint(0, (1<<alusim.rwidth)-1)
 992             yield dut.intregs.regs[i].reg.eq(val)
 993             alusim.setval(i, val)
 994
 995         if False:
 996             # create some instructions: branches create a tree
 997             insts = create_random_ops(dut, 1, True, 1)
 998             #insts.append((6, 6, 1, 2, (0, 0)))
 999             #insts.append((4, 3, 3, 0, (0, 0)))
1000
1001             src1 = randint(1, dut.n_regs-1)
1002             src2 = randint(1, dut.n_regs-1)
1003             #op = randint(4, 7)
1004             op = 4 # only BGT at the moment
1005
1006             branch_ok = create_random_ops(dut, 1, True, 1)
1007             branch_fail = create_random_ops(dut, 1, True, 1)
1008
1009             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1010
1011         if True:
1012             insts = []
1013             insts.append( (3, 5, 2, 0, (0, 0)) )
1014             branch_ok = []
1015             branch_fail = []
1016             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1017             branch_ok.append( None )
1018             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1019             #branch_fail.append( None )
1020             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1021
1022         siminsts = deepcopy(insts)
1023
1024         # issue instruction(s)
1025         i = -1
1026         instrs = insts
1027         branch_direction = 0
1028         while instrs:
1029             yield
1030             yield
1031             i += 1
1032             branch_direction = yield dut.branch_direction_o # way branch went
1033             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1034             if branch_direction == 1 and shadow_on:
1035                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1036                 continue # branch was "success" and this is a "failed"... skip
1037             if branch_direction == 2 and shadow_off:
1038                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1039                 continue # branch was "fail" and this is a "success"... skip
1040             if branch_direction != 0:
1041                 shadow_on = 0
1042                 shadow_off = 0
1043             is_branch = op >= 4
1044             if is_branch:
1045                 branch_ok, branch_fail = dest
1046                 dest = src2
1047                 # ok zip up the branch success / fail instructions and
1048                 # drop them into the queue, one marked "to have branch success"
1049                 # the other to be marked shadow branch "fail".
1050                 # one out of each of these will be cancelled
1051                 for ok, fl in zip(branch_ok, branch_fail):
1052                     if ok:
1053                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1054                     if fl:
1055                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1056             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1057                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1058             yield from int_instr(dut, op, src1, src2, dest,
1059                                  shadow_on, shadow_off)
1060
1061         # wait for all instructions to stop before checking
1062         yield
1063         yield from wait_for_busy_clear(dut)
1064
1065         i = -1
1066         while siminsts:
1067             instr = siminsts.pop(0)
1068             if instr is None:
1069                 continue
1070             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1071             i += 1
1072             is_branch = op >= 4
1073             if is_branch:
1074                 branch_ok, branch_fail = dest
1075                 dest = src2
1076             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1077                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1078             branch_res = alusim.op(op, src1, src2, dest)
1079             if is_branch:
1080                 if branch_res:
1081                     siminsts += branch_ok
1082                 else:
1083                     siminsts += branch_fail
1084
1085         # check status
1086         yield from alusim.check(dut)
1087         yield from alusim.dump(dut)
1088
1089
1090 def scoreboard_sim(dut, alusim):
1091
1092     seed(0)
1093
1094     for i in range(1):
1095
1096         # set random values in the registers
1097         for i in range(1, dut.n_regs):
1098             val = randint(0, (1<<alusim.rwidth)-1)
1099             #val = 31+i*3
1100             #val = i
1101             yield dut.intregs.regs[i].reg.eq(val)
1102             alusim.setval(i, val)
1103
1104         # create some instructions (some random, some regression tests)
1105         instrs = []
1106         if False:
1107             instrs = create_random_ops(dut, 15, True, 4)
1108
1109         if True: # LD test (with immediate)
1110             instrs.append( (1, 2, 2, 0x10, 1, 20, (0, 0)) )
1111
1112         if False:
1113             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1114
1115         if False:
1116             instrs.append( (7, 3, 2, 4, (0, 0)) )
1117             instrs.append( (7, 6, 6, 2, (0, 0)) )
1118             instrs.append( (1, 7, 2, 2, (0, 0)) )
1119
1120         if False:
1121             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1122             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1123             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1124             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1125             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1126
1127         if False:
1128             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1129             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1130             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1131
1132         if False:
1133             instrs.append((5, 6, 2, 1))
1134             instrs.append((2, 2, 4, 0))
1135             #instrs.append((2, 2, 3, 1))
1136
1137         if False:
1138             instrs.append((2, 1, 2, 3))
1139
1140         if False:
1141             instrs.append((2, 6, 2, 1))
1142             instrs.append((2, 1, 2, 0))
1143
1144         if False:
1145             instrs.append((1, 2, 7, 2))
1146             instrs.append((7, 1, 5, 0))
1147             instrs.append((4, 4, 1, 1))
1148
1149         if False:
1150             instrs.append((5, 6, 2, 2))
1151             instrs.append((1, 1, 4, 1))
1152             instrs.append((6, 5, 3, 0))
1153
1154         if False:
1155             # Write-after-Write Hazard
1156             instrs.append( (3, 6, 7, 2) )
1157             instrs.append( (4, 4, 7, 1) )
1158
1159         if False:
1160             # self-read/write-after-write followed by Read-after-Write
1161             instrs.append((1, 1, 1, 1))
1162             instrs.append((1, 5, 3, 0))
1163
1164         if False:
1165             # Read-after-Write followed by self-read-after-write
1166             instrs.append((5, 6, 1, 2))
1167             instrs.append((1, 1, 1, 1))
1168
1169         if False:
1170             # self-read-write sandwich
1171             instrs.append((5, 6, 1, 2))
1172             instrs.append((1, 1, 1, 1))
1173             instrs.append((1, 5, 3, 0))
1174
1175         if False:
1176             # very weird failure
1177             instrs.append( (5, 2, 5, 2) )
1178             instrs.append( (2, 6, 3, 0) )
1179             instrs.append( (4, 2, 2, 1) )
1180
1181         if False:
1182             v1 = 4
1183             yield dut.intregs.regs[5].reg.eq(v1)
1184             alusim.setval(5, v1)
1185             yield dut.intregs.regs[3].reg.eq(5)
1186             alusim.setval(3, 5)
1187             instrs.append((5, 3, 3, 4, (0, 0)))
1188             instrs.append((4, 2, 1, 2, (0, 1)))
1189
1190         if False:
1191             v1 = 6
1192             yield dut.intregs.regs[5].reg.eq(v1)
1193             alusim.setval(5, v1)
1194             yield dut.intregs.regs[3].reg.eq(5)
1195             alusim.setval(3, 5)
1196             instrs.append((5, 3, 3, 4, (0, 0)))
1197             instrs.append((4, 2, 1, 2, (1, 0)))
1198
1199         if False:
1200             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1201             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1202             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1203             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1204             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1205             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1206             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1207             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1208             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1209
1210         # issue instruction(s), wait for issue to be free before proceeding
1211         for i, instr in enumerate(instrs):
1212             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1213
1214             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1215                     (i, src1, src2, dest, op, opi, imm))
1216             alusim.op(op, opi, imm, src1, src2, dest)
1217             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1218                                br_ok, br_fail)
1219
1220         # wait for all instructions to stop before checking
1221         while True:
1222             iqlen = yield dut.qlen_o
1223             if iqlen == 0:
1224                 break
1225             yield
1226         yield
1227         yield
1228         yield
1229         yield
1230         yield from wait_for_busy_clear(dut)
1231
1232         # check status
1233         yield from alusim.check(dut)
1234         yield from alusim.dump(dut)
1235
1236
1237 def test_scoreboard():
1238     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1239     alusim = RegSim(16, 8)
1240     memsim = MemSim(16, 16)
1241     vl = rtlil.convert(dut, ports=dut.ports())
1242     with open("test_scoreboard6600.il", "w") as f:
1243         f.write(vl)
1244
1245     run_simulation(dut, scoreboard_sim(dut, alusim),
1246                         vcd_name='test_scoreboard6600.vcd')
1247
1248     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1249     #                    vcd_name='test_scoreboard6600.vcd')
1250
1251
1252 if __name__ == '__main__':
1253     test_scoreboard()