src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117
 118         # outputs
 119         self.busy_o = Signal(n_units, reset_less=True)
 120         self.rd_rel_o = Signal(n_units, reset_less=True)
 121         self.req_rel_o = Signal(n_units, reset_less=True)
 122         if ldstmode:
 123             self.adr_rel_o = Signal(n_units, reset_less=True)
 124             self.sto_rel_o = Signal(n_units, reset_less=True)
 125             self.req_rel_o = Signal(n_units, reset_less=True)
 126             self.load_mem_o = Signal(n_units, reset_less=True)
 127             self.stwd_mem_o = Signal(n_units, reset_less=True)
 128
 129         # in/out register data (note: not register#, actual data)
 130         self.data_o = Signal(rwid, reset_less=True)
 131         self.src1_i = Signal(rwid, reset_less=True)
 132         self.src2_i = Signal(rwid, reset_less=True)
 133         # input operand
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137         comb = m.d.comb
 138
 139         for i, alu in enumerate(self.units):
 140             setattr(m.submodules, "comp%d" % i, alu)
 141
 142         go_rd_l = []
 143         go_wr_l = []
 144         issue_l = []
 145         busy_l = []
 146         req_rel_l = []
 147         rd_rel_l = []
 148         shadow_l = []
 149         godie_l = []
 150         for alu in self.units:
 151             req_rel_l.append(alu.req_rel_o)
 152             rd_rel_l.append(alu.rd_rel_o)
 153             shadow_l.append(alu.shadown_i)
 154             godie_l.append(alu.go_die_i)
 155             go_wr_l.append(alu.go_wr_i)
 156             go_rd_l.append(alu.go_rd_i)
 157             issue_l.append(alu.issue_i)
 158             busy_l.append(alu.busy_o)
 159         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 160         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 161         comb += self.busy_o.eq(Cat(*busy_l))
 162         comb += Cat(*godie_l).eq(self.go_die_i)
 163         comb += Cat(*shadow_l).eq(self.shadown_i)
 164         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 165         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # bit of a hack: treereduce needs a list with an item named "data_o"
 172         if self.units:
 173             data_o = treereduce(self.units)
 174             comb += self.data_o.eq(data_o)
 175
 176         for i, alu in enumerate(self.units):
 177             comb += alu.src1_i.eq(self.src1_i)
 178             comb += alu.src2_i.eq(self.src2_i)
 179
 180         if not self.ldstmode:
 181             return m
 182
 183         ldmem_l = []
 184         stmem_l = []
 185         go_ad_l = []
 186         adr_rel_l = []
 187         sto_rel_l = []
 188         for alu in self.units:
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 195         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 196         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 197         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 198         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 199
 200         return m
 201
 202
 203 class CompUnitLDSTs(CompUnitsBase):
 204
 205     def __init__(self, rwid, opwid, n_ldsts, mem):
 206         """ Inputs:
 207
 208             * :rwid:   bit width of register file(s) - both FP and INT
 209             * :opwid:  operand bit width
 210         """
 211         self.opwid = opwid
 212
 213         # inputs
 214         self.oper_i = Signal(opwid, reset_less=True)
 215         self.imm_i = Signal(rwid, reset_less=True)
 216
 217         # Int ALUs
 218         self.alus = []
 219         for i in range(n_ldsts):
 220             self.alus.append(ALU(rwid))
 221
 222         units = []
 223         for alu in self.alus:
 224             aluopwid = 4 # see compldst.py for "internal" opcode
 225             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 226
 227         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 228
 229     def elaborate(self, platform):
 230         m = CompUnitsBase.elaborate(self, platform)
 231         comb = m.d.comb
 232
 233         # hand the same operation to all units, 4 lower bits though
 234         for alu in self.units:
 235             comb += alu.oper_i[0:4].eq(self.oper_i)
 236             comb += alu.imm_i.eq(self.imm_i)
 237             comb += alu.isalu_i.eq(0)
 238
 239         return m
 240
 241
 242 class CompUnitALUs(CompUnitsBase):
 243
 244     def __init__(self, rwid, opwid, n_alus):
 245         """ Inputs:
 246
 247             * :rwid:   bit width of register file(s) - both FP and INT
 248             * :opwid:  operand bit width
 249         """
 250         self.opwid = opwid
 251
 252         # inputs
 253         self.oper_i = Signal(opwid, reset_less=True)
 254         self.imm_i = Signal(rwid, reset_less=True)
 255
 256         # Int ALUs
 257         alus = []
 258         for i in range(n_alus):
 259             alus.append(ALU(rwid))
 260
 261         units = []
 262         for alu in alus:
 263             aluopwid = 3 # extra bit for immediate mode
 264             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 265
 266         CompUnitsBase.__init__(self, rwid, units)
 267
 268     def elaborate(self, platform):
 269         m = CompUnitsBase.elaborate(self, platform)
 270         comb = m.d.comb
 271
 272         # hand the same operation to all units, only lower 3 bits though
 273         for alu in self.units:
 274             comb += alu.oper_i[0:3].eq(self.oper_i)
 275             comb += alu.imm_i.eq(self.imm_i)
 276
 277         return m
 278
 279
 280 class CompUnitBR(CompUnitsBase):
 281
 282     def __init__(self, rwid, opwid):
 283         """ Inputs:
 284
 285             * :rwid:   bit width of register file(s) - both FP and INT
 286             * :opwid:  operand bit width
 287
 288             Note: bgt unit is returned so that a shadow unit can be created
 289             for it
 290         """
 291         self.opwid = opwid
 292
 293         # inputs
 294         self.oper_i = Signal(opwid, reset_less=True)
 295         self.imm_i = Signal(rwid, reset_less=True)
 296
 297         # Branch ALU and CU
 298         self.bgt = BranchALU(rwid)
 299         aluopwid = 3 # extra bit for immediate mode
 300         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 301         CompUnitsBase.__init__(self, rwid, [self.br1])
 302
 303     def elaborate(self, platform):
 304         m = CompUnitsBase.elaborate(self, platform)
 305         comb = m.d.comb
 306
 307         # hand the same operation to all units
 308         for alu in self.units:
 309             comb += alu.oper_i.eq(self.oper_i)
 310             comb += alu.imm_i.eq(self.imm_i)
 311
 312         return m
 313
 314
 315 class FunctionUnits(Elaboratable):
 316
 317     def __init__(self, n_regs, n_int_alus):
 318         self.n_regs = n_regs
 319         self.n_int_alus = n_int_alus
 320
 321         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 322         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 323         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 324
 325         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 326         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 327
 328         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 329         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 330         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 331
 332         self.readable_o = Signal(n_int_alus, reset_less=True)
 333         self.writable_o = Signal(n_int_alus, reset_less=True)
 334
 335         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 336         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 337         self.go_die_i = Signal(n_int_alus, reset_less=True)
 338         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 339
 340         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 341
 342     def elaborate(self, platform):
 343         m = Module()
 344         comb = m.d.comb
 345         sync = m.d.sync
 346
 347         n_intfus = self.n_int_alus
 348
 349         # Integer FU-FU Dep Matrix
 350         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 351         m.submodules.intfudeps = intfudeps
 352         # Integer FU-Reg Dep Matrix
 353         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 354         m.submodules.intregdeps = intregdeps
 355
 356         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 357         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 358
 359         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 360         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 361
 362         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 363         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 364         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 365
 366         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 367         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 368         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 369         comb += intfudeps.go_die_i.eq(self.go_die_i)
 370         comb += self.readable_o.eq(intfudeps.readable_o)
 371         comb += self.writable_o.eq(intfudeps.writable_o)
 372
 373         # Connect function issue / arrays, and dest/src1/src2
 374         comb += intregdeps.dest_i.eq(self.dest_i)
 375         comb += intregdeps.src_i[0].eq(self.src1_i)
 376         comb += intregdeps.src_i[1].eq(self.src2_i)
 377
 378         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 379         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 380         comb += intregdeps.go_die_i.eq(self.go_die_i)
 381         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 382
 383         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 384         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 385         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 386
 387         return m
 388
 389
 390 class Scoreboard(Elaboratable):
 391     def __init__(self, rwid, n_regs):
 392         """ Inputs:
 393
 394             * :rwid:   bit width of register file(s) - both FP and INT
 395             * :n_regs: depth of register file(s) - number of FP and INT regs
 396         """
 397         self.rwid = rwid
 398         self.n_regs = n_regs
 399
 400         # Register Files
 401         self.intregs = RegFileArray(rwid, n_regs)
 402         self.fpregs = RegFileArray(rwid, n_regs)
 403
 404         # issue q needs to get at these
 405         self.aluissue = IssueUnitGroup(2)
 406         self.lsissue = IssueUnitGroup(2)
 407         self.brissue = IssueUnitGroup(1)
 408         # and these
 409         self.alu_oper_i = Signal(4, reset_less=True)
 410         self.alu_imm_i = Signal(rwid, reset_less=True)
 411         self.br_oper_i = Signal(4, reset_less=True)
 412         self.br_imm_i = Signal(rwid, reset_less=True)
 413         self.ls_oper_i = Signal(4, reset_less=True)
 414         self.ls_imm_i = Signal(rwid, reset_less=True)
 415
 416         # inputs
 417         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 418         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 419         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 420         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 421
 422         # outputs
 423         self.issue_o = Signal(reset_less=True) # instruction was accepted
 424         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 425
 426         # for branch speculation experiment.  branch_direction = 0 if
 427         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 428         # branch_succ and branch_fail are requests to have the current
 429         # instruction be dependent on the branch unit "shadow" capability.
 430         self.branch_succ_i = Signal(reset_less=True)
 431         self.branch_fail_i = Signal(reset_less=True)
 432         self.branch_direction_o = Signal(2, reset_less=True)
 433
 434     def elaborate(self, platform):
 435         m = Module()
 436         comb = m.d.comb
 437         sync = m.d.sync
 438
 439         m.submodules.intregs = self.intregs
 440         m.submodules.fpregs = self.fpregs
 441
 442         # register ports
 443         int_dest = self.intregs.write_port("dest")
 444         int_src1 = self.intregs.read_port("src1")
 445         int_src2 = self.intregs.read_port("src2")
 446
 447         fp_dest = self.fpregs.write_port("dest")
 448         fp_src1 = self.fpregs.read_port("src1")
 449         fp_src2 = self.fpregs.read_port("src2")
 450
 451         # Int ALUs and BR ALUs
 452         n_int_alus = 5
 453         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 454         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 455
 456         # LDST Comp Units
 457         n_ldsts = 2
 458         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 459
 460         # Comp Units
 461         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 462         bgt = cub.bgt # get at the branch computation unit
 463         br1 = cub.br1
 464
 465         # Int FUs
 466         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 467
 468         # Memory FUs
 469         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 470
 471         # Count of number of FUs
 472         n_intfus = n_int_alus
 473         n_fp_fus = 0 # for now
 474
 475         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 476         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 477         m.submodules.intpick1 = intpick1
 478
 479         # INT/FP Issue Unit
 480         regdecode = RegDecode(self.n_regs)
 481         m.submodules.regdecode = regdecode
 482         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 483         m.submodules.issueunit = issueunit
 484
 485         # Shadow Matrix.  currently n_intfus shadows, to be used for
 486         # write-after-write hazards.  NOTE: there is one extra for branches,
 487         # so the shadow width is increased by 1
 488         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 489         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 490
 491         # record previous instruction to cast shadow on current instruction
 492         prev_shadow = Signal(n_intfus)
 493
 494         # Branch Speculation recorder.  tracks the success/fail state as
 495         # each instruction is issued, so that when the branch occurs the
 496         # allow/cancel can be issued as appropriate.
 497         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 498
 499         #---------
 500         # ok start wiring things together...
 501         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 502         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 503         #---------
 504
 505         #---------
 506         # Issue Unit is where it starts.  set up some in/outs for this module
 507         #---------
 508         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 509                      regdecode.src1_i.eq(self.int_src1_i),
 510                      regdecode.src2_i.eq(self.int_src2_i),
 511                      regdecode.enable_i.eq(self.reg_enable_i),
 512                      self.issue_o.eq(issueunit.issue_o)
 513                     ]
 514
 515         # take these to outside (issue needs them)
 516         comb += cua.oper_i.eq(self.alu_oper_i)
 517         comb += cua.imm_i.eq(self.alu_imm_i)
 518         comb += cub.oper_i.eq(self.br_oper_i)
 519         comb += cub.imm_i.eq(self.br_imm_i)
 520         comb += cul.oper_i.eq(self.ls_oper_i)
 521         comb += cul.imm_i.eq(self.ls_imm_i)
 522
 523         # TODO: issueunit.f (FP)
 524
 525         # and int function issue / busy arrays, and dest/src1/src2
 526         comb += intfus.dest_i.eq(regdecode.dest_o)
 527         comb += intfus.src1_i.eq(regdecode.src1_o)
 528         comb += intfus.src2_i.eq(regdecode.src2_o)
 529
 530         fn_issue_o = issueunit.fn_issue_o
 531
 532         comb += intfus.fn_issue_i.eq(fn_issue_o)
 533         comb += issueunit.busy_i.eq(cu.busy_o)
 534         comb += self.busy_o.eq(cu.busy_o.bool())
 535
 536         #---------
 537         # Memory Function Unit
 538         #---------
 539         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 540         comb += memfus.addr_we_i.eq(cul.adr_rel_o) # Match enable on adr rel
 541
 542         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 543         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 544         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 545
 546         #---------
 547         # merge shadow matrices outputs
 548         #---------
 549
 550         # these are explained in ShadowMatrix docstring, and are to be
 551         # connected to the FUReg and FUFU Matrices, to get them to reset
 552         anydie = Signal(n_intfus, reset_less=True)
 553         allshadown = Signal(n_intfus, reset_less=True)
 554         shreset = Signal(n_intfus, reset_less=True)
 555         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 556         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 557         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 558
 559         #---------
 560         # connect fu-fu matrix
 561         #---------
 562
 563         # Group Picker... done manually for now.
 564         go_rd_o = intpick1.go_rd_o
 565         go_wr_o = intpick1.go_wr_o
 566         go_rd_i = intfus.go_rd_i
 567         go_wr_i = intfus.go_wr_i
 568         go_die_i = intfus.go_die_i
 569         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 570         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 571         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 572         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 573
 574         # Connect Picker
 575         #---------
 576         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 577         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 578         int_rd_o = intfus.readable_o
 579         int_wr_o = intfus.writable_o
 580         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 581         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 582
 583         #---------
 584         # Shadow Matrix
 585         #---------
 586
 587         comb += shadows.issue_i.eq(fn_issue_o)
 588         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 589         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 590         #---------
 591         # NOTE; this setup is for the instruction order preservation...
 592
 593         # connect shadows / go_dies to Computation Units
 594         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 595         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 596
 597         # ok connect first n_int_fu shadows to busy lines, to create an
 598         # instruction-order linked-list-like arrangement, using a bit-matrix
 599         # (instead of e.g. a ring buffer).
 600         # XXX TODO
 601
 602         # when written, the shadow can be cancelled (and was good)
 603         for i in range(n_intfus):
 604             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 605
 606         # *previous* instruction shadows *current* instruction, and, obviously,
 607         # if the previous is completed (!busy) don't cast the shadow!
 608         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 609         for i in range(n_intfus):
 610             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 611
 612         #---------
 613         # ... and this is for branch speculation.  it uses the extra bit
 614         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 615         # only needs to set shadow_i, s_fail_i and s_good_i
 616
 617         # issue captures shadow_i (if enabled)
 618         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 619
 620         bactive = Signal(reset_less=True)
 621         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 622
 623         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 624         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 625             comb += bshadow.issue_i.eq(fn_issue_o)
 626             for i in range(n_intfus):
 627                 with m.If(fn_issue_o & (Const(1<<i))):
 628                     comb += bshadow.shadow_i[i][0].eq(1)
 629
 630         # finally, we need an indicator to the test infrastructure as to
 631         # whether the branch succeeded or failed, plus, link up to the
 632         # "recorder" of whether the instruction was under shadow or not
 633
 634         with m.If(br1.issue_i):
 635             sync += bspec.active_i.eq(1)
 636         with m.If(self.branch_succ_i):
 637             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 638         with m.If(self.branch_fail_i):
 639             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 640
 641         # branch is active (TODO: a better signal: this is over-using the
 642         # go_write signal - actually the branch should not be "writing")
 643         with m.If(br1.go_wr_i):
 644             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 645             sync += bspec.active_i.eq(0)
 646             comb += bspec.br_i.eq(1)
 647             # branch occurs if data == 1, failed if data == 0
 648             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 649             for i in range(n_intfus):
 650                 # *expected* direction of the branch matched against *actual*
 651                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 652                 # ... or it didn't
 653                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 654
 655         #---------
 656         # Connect Register File(s)
 657         #---------
 658         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 659         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 660         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 661
 662         # connect ALUs to regfule
 663         comb += int_dest.data_i.eq(cu.data_o)
 664         comb += cu.src1_i.eq(int_src1.data_o)
 665         comb += cu.src2_i.eq(int_src2.data_o)
 666
 667         # connect ALU Computation Units
 668         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 669         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 670         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 671
 672         return m
 673
 674     def __iter__(self):
 675         yield from self.intregs
 676         yield from self.fpregs
 677         yield self.int_dest_i
 678         yield self.int_src1_i
 679         yield self.int_src2_i
 680         yield self.issue_o
 681         yield self.branch_succ_i
 682         yield self.branch_fail_i
 683         yield self.branch_direction_o
 684
 685     def ports(self):
 686         return list(self)
 687
 688
 689 class IssueToScoreboard(Elaboratable):
 690
 691     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 692         self.qlen = qlen
 693         self.n_in = n_in
 694         self.n_out = n_out
 695         self.rwid = rwid
 696         self.opw = opwid
 697         self.n_regs = n_regs
 698
 699         mqbits = (int(log(qlen) / log(2))+2, False)
 700         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 701         self.p_ready_o = Signal() # instructions were added
 702         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 703
 704         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 705         self.qlen_o = Signal(mqbits, reset_less=True)
 706
 707     def elaborate(self, platform):
 708         m = Module()
 709         comb = m.d.comb
 710         sync = m.d.sync
 711
 712         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 713         sc = Scoreboard(self.rwid, self.n_regs)
 714         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 715         m.submodules.iq = iq
 716         m.submodules.sc = sc
 717         m.submodules.mem = mem
 718
 719         # get at the regfile for testing
 720         self.intregs = sc.intregs
 721
 722         # and the "busy" signal and instruction queue length
 723         comb += self.busy_o.eq(sc.busy_o)
 724         comb += self.qlen_o.eq(iq.qlen_o)
 725
 726         # link up instruction queue
 727         comb += iq.p_add_i.eq(self.p_add_i)
 728         comb += self.p_ready_o.eq(iq.p_ready_o)
 729         for i in range(self.n_in):
 730             comb += eq(iq.data_i[i], self.data_i[i])
 731
 732         # take instruction and process it.  note that it's possible to
 733         # "inspect" the queue contents *without* actually removing the
 734         # items.  items are only removed when the
 735
 736         # in "waiting" state
 737         wait_issue_br = Signal()
 738         wait_issue_alu = Signal()
 739         wait_issue_ls = Signal()
 740
 741         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 742             # set instruction pop length to 1 if the unit accepted
 743             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 744                 with m.If(iq.qlen_o != 0):
 745                     comb += iq.n_sub_i.eq(1)
 746             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 747                 with m.If(iq.qlen_o != 0):
 748                     comb += iq.n_sub_i.eq(1)
 749             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 750                 with m.If(iq.qlen_o != 0):
 751                     comb += iq.n_sub_i.eq(1)
 752
 753         # see if some instruction(s) are here.  note that this is
 754         # "inspecting" the in-place queue.  note also that on the
 755         # cycle following "waiting" for fn_issue_o to be set, the
 756         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 757         with m.If(iq.qlen_o != 0):
 758             # get the operands and operation
 759             imm = iq.data_o[0].imm_i
 760             dest = iq.data_o[0].dest_i
 761             src1 = iq.data_o[0].src1_i
 762             src2 = iq.data_o[0].src2_i
 763             op = iq.data_o[0].oper_i
 764             opi = iq.data_o[0].opim_i # immediate set
 765
 766             # set the src/dest regs
 767             comb += sc.int_dest_i.eq(dest)
 768             comb += sc.int_src1_i.eq(src1)
 769             comb += sc.int_src2_i.eq(src2)
 770             comb += sc.reg_enable_i.eq(1) # enable the regfile
 771
 772             # choose a Function-Unit-Group
 773             with m.If((op & (0x3<<2)) != 0): # branch
 774                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 775                 comb += sc.br_imm_i.eq(imm)
 776                 comb += sc.brissue.insn_i.eq(1)
 777                 comb += wait_issue_br.eq(1)
 778             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 779                 # see compldst.py
 780                 # bit 0: ADD/SUB
 781                 # bit 1: immed
 782                 # bit 4: LD
 783                 # bit 5: ST
 784                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 785                 comb += sc.ls_imm_i.eq(imm)
 786                 comb += sc.lsissue.insn_i.eq(1)
 787                 comb += wait_issue_ls.eq(1)
 788             with m.Else(): # alu
 789                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 790                 comb += sc.alu_imm_i.eq(imm)
 791                 comb += sc.aluissue.insn_i.eq(1)
 792                 comb += wait_issue_alu.eq(1)
 793
 794             # XXX TODO
 795             # these indicate that the instruction is to be made
 796             # shadow-dependent on
 797             # (either) branch success or branch fail
 798             #yield sc.branch_fail_i.eq(branch_fail)
 799             #yield sc.branch_succ_i.eq(branch_success)
 800
 801         return m
 802
 803     def __iter__(self):
 804         yield self.p_ready_o
 805         for o in self.data_i:
 806             yield from list(o)
 807         yield self.p_add_i
 808
 809     def ports(self):
 810         return list(self)
 811
 812
 813 IADD = 0
 814 ISUB = 1
 815 IMUL = 2
 816 ISHF = 3
 817 IBGT = 4
 818 IBLT = 5
 819 IBEQ = 6
 820 IBNE = 7
 821
 822
 823 class RegSim:
 824     def __init__(self, rwidth, nregs):
 825         self.rwidth = rwidth
 826         self.regs = [0] * nregs
 827
 828     def op(self, op, op_imm, imm, src1, src2, dest):
 829         maxbits = (1 << self.rwidth) - 1
 830         src1 = self.regs[src1] & maxbits
 831         if op_imm:
 832             src2 = imm
 833         else:
 834             src2 = self.regs[src2] & maxbits
 835         if op == IADD:
 836             val = src1 + src2
 837         elif op == ISUB:
 838             val = src1 - src2
 839         elif op == IMUL:
 840             val = src1 * src2
 841         elif op == ISHF:
 842             val = src1 >> (src2 & maxbits)
 843         elif op == IBGT:
 844             val = int(src1 > src2)
 845         elif op == IBLT:
 846             val = int(src1 < src2)
 847         elif op == IBEQ:
 848             val = int(src1 == src2)
 849         elif op == IBNE:
 850             val = int(src1 != src2)
 851         else:
 852             return 0 # LD/ST TODO
 853         val &= maxbits
 854         self.setval(dest, val)
 855         return val
 856
 857     def setval(self, dest, val):
 858         print ("sim setval", dest, hex(val))
 859         self.regs[dest] = val
 860
 861     def dump(self, dut):
 862         for i, val in enumerate(self.regs):
 863             reg = yield dut.intregs.regs[i].reg
 864             okstr = "OK" if reg == val else "!ok"
 865             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 866
 867     def check(self, dut):
 868         for i, val in enumerate(self.regs):
 869             reg = yield dut.intregs.regs[i].reg
 870             if reg != val:
 871                 print("reg %d expected %x received %x\n" % (i, val, reg))
 872                 yield from self.dump(dut)
 873                 assert False
 874
 875 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 876             branch_success, branch_fail):
 877     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 878                'src1_i': src1, 'src2_i': src2}]
 879
 880     sendlen = 1
 881     for idx in range(sendlen):
 882         yield from eq(dut.data_i[idx], instrs[idx])
 883         di = yield dut.data_i[idx]
 884         print ("senddata %d %x" % (idx, di))
 885     yield dut.p_add_i.eq(sendlen)
 886     yield
 887     o_p_ready = yield dut.p_ready_o
 888     while not o_p_ready:
 889         yield
 890         o_p_ready = yield dut.p_ready_o
 891
 892     yield dut.p_add_i.eq(0)
 893
 894
 895 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 896     yield from disable_issue(dut)
 897     yield dut.int_dest_i.eq(dest)
 898     yield dut.int_src1_i.eq(src1)
 899     yield dut.int_src2_i.eq(src2)
 900     if (op & (0x3<<2)) != 0: # branch
 901         yield dut.brissue.insn_i.eq(1)
 902         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 903         yield dut.br_imm_i.eq(imm)
 904         dut_issue = dut.brissue
 905     else:
 906         yield dut.aluissue.insn_i.eq(1)
 907         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 908         yield dut.alu_imm_i.eq(imm)
 909         dut_issue = dut.aluissue
 910     yield dut.reg_enable_i.eq(1)
 911
 912     # these indicate that the instruction is to be made shadow-dependent on
 913     # (either) branch success or branch fail
 914     yield dut.branch_fail_i.eq(branch_fail)
 915     yield dut.branch_succ_i.eq(branch_success)
 916
 917     yield
 918     yield from wait_for_issue(dut, dut_issue)
 919
 920
 921 def print_reg(dut, rnums):
 922     rs = []
 923     for rnum in rnums:
 924         reg = yield dut.intregs.regs[rnum].reg
 925         rs.append("%x" % reg)
 926     rnums = map(str, rnums)
 927     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 928
 929
 930 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 931     insts = []
 932     for i in range(n_ops):
 933         src1 = randint(1, dut.n_regs-1)
 934         src2 = randint(1, dut.n_regs-1)
 935         imm = randint(1, (1<<dut.rwid)-1)
 936         dest = randint(1, dut.n_regs-1)
 937         op = randint(0, max_opnums)
 938         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 939
 940         if shadowing:
 941             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 942         else:
 943             insts.append((src1, src2, dest, op, opi, imm))
 944     return insts
 945
 946
 947 def wait_for_busy_clear(dut):
 948     while True:
 949         busy_o = yield dut.busy_o
 950         if not busy_o:
 951             break
 952         print ("busy",)
 953         yield
 954
 955 def disable_issue(dut):
 956     yield dut.aluissue.insn_i.eq(0)
 957     yield dut.brissue.insn_i.eq(0)
 958     yield dut.lsissue.insn_i.eq(0)
 959
 960
 961 def wait_for_issue(dut, dut_issue):
 962     while True:
 963         issue_o = yield dut_issue.fn_issue_o
 964         if issue_o:
 965             yield from disable_issue(dut)
 966             yield dut.reg_enable_i.eq(0)
 967             break
 968         print ("busy",)
 969         #yield from print_reg(dut, [1,2,3])
 970         yield
 971     #yield from print_reg(dut, [1,2,3])
 972
 973 def scoreboard_branch_sim(dut, alusim):
 974
 975     iseed = 3
 976
 977     for i in range(1):
 978
 979         print ("rseed", iseed)
 980         seed(iseed)
 981         iseed += 1
 982
 983         yield dut.branch_direction_o.eq(0)
 984
 985         # set random values in the registers
 986         for i in range(1, dut.n_regs):
 987             val = 31+i*3
 988             val = randint(0, (1<<alusim.rwidth)-1)
 989             yield dut.intregs.regs[i].reg.eq(val)
 990             alusim.setval(i, val)
 991
 992         if False:
 993             # create some instructions: branches create a tree
 994             insts = create_random_ops(dut, 1, True, 1)
 995             #insts.append((6, 6, 1, 2, (0, 0)))
 996             #insts.append((4, 3, 3, 0, (0, 0)))
 997
 998             src1 = randint(1, dut.n_regs-1)
 999             src2 = randint(1, dut.n_regs-1)
1000             #op = randint(4, 7)
1001             op = 4 # only BGT at the moment
1002
1003             branch_ok = create_random_ops(dut, 1, True, 1)
1004             branch_fail = create_random_ops(dut, 1, True, 1)
1005
1006             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1007
1008         if True:
1009             insts = []
1010             insts.append( (3, 5, 2, 0, (0, 0)) )
1011             branch_ok = []
1012             branch_fail = []
1013             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1014             branch_ok.append( None )
1015             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1016             #branch_fail.append( None )
1017             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1018
1019         siminsts = deepcopy(insts)
1020
1021         # issue instruction(s)
1022         i = -1
1023         instrs = insts
1024         branch_direction = 0
1025         while instrs:
1026             yield
1027             yield
1028             i += 1
1029             branch_direction = yield dut.branch_direction_o # way branch went
1030             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1031             if branch_direction == 1 and shadow_on:
1032                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1033                 continue # branch was "success" and this is a "failed"... skip
1034             if branch_direction == 2 and shadow_off:
1035                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1036                 continue # branch was "fail" and this is a "success"... skip
1037             if branch_direction != 0:
1038                 shadow_on = 0
1039                 shadow_off = 0
1040             is_branch = op >= 4
1041             if is_branch:
1042                 branch_ok, branch_fail = dest
1043                 dest = src2
1044                 # ok zip up the branch success / fail instructions and
1045                 # drop them into the queue, one marked "to have branch success"
1046                 # the other to be marked shadow branch "fail".
1047                 # one out of each of these will be cancelled
1048                 for ok, fl in zip(branch_ok, branch_fail):
1049                     if ok:
1050                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1051                     if fl:
1052                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1053             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1054                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1055             yield from int_instr(dut, op, src1, src2, dest,
1056                                  shadow_on, shadow_off)
1057
1058         # wait for all instructions to stop before checking
1059         yield
1060         yield from wait_for_busy_clear(dut)
1061
1062         i = -1
1063         while siminsts:
1064             instr = siminsts.pop(0)
1065             if instr is None:
1066                 continue
1067             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1068             i += 1
1069             is_branch = op >= 4
1070             if is_branch:
1071                 branch_ok, branch_fail = dest
1072                 dest = src2
1073             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1074                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1075             branch_res = alusim.op(op, src1, src2, dest)
1076             if is_branch:
1077                 if branch_res:
1078                     siminsts += branch_ok
1079                 else:
1080                     siminsts += branch_fail
1081
1082         # check status
1083         yield from alusim.check(dut)
1084         yield from alusim.dump(dut)
1085
1086
1087 def scoreboard_sim(dut, alusim):
1088
1089     seed(0)
1090
1091     for i in range(1):
1092
1093         # set random values in the registers
1094         for i in range(1, dut.n_regs):
1095             val = randint(0, (1<<alusim.rwidth)-1)
1096             #val = 31+i*3
1097             #val = i
1098             yield dut.intregs.regs[i].reg.eq(val)
1099             alusim.setval(i, val)
1100
1101         # create some instructions (some random, some regression tests)
1102         instrs = []
1103         if False:
1104             instrs = create_random_ops(dut, 15, True, 4)
1105
1106         if True: # LD test (with immediate)
1107             instrs.append( (1, 2, 2, 0x10, 1, 20, (0, 0)) )
1108
1109         if False:
1110             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1111
1112         if False:
1113             instrs.append( (7, 3, 2, 4, (0, 0)) )
1114             instrs.append( (7, 6, 6, 2, (0, 0)) )
1115             instrs.append( (1, 7, 2, 2, (0, 0)) )
1116
1117         if False:
1118             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1119             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1120             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1121             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1122             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1123
1124         if False:
1125             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1126             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1127             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1128
1129         if False:
1130             instrs.append((5, 6, 2, 1))
1131             instrs.append((2, 2, 4, 0))
1132             #instrs.append((2, 2, 3, 1))
1133
1134         if False:
1135             instrs.append((2, 1, 2, 3))
1136
1137         if False:
1138             instrs.append((2, 6, 2, 1))
1139             instrs.append((2, 1, 2, 0))
1140
1141         if False:
1142             instrs.append((1, 2, 7, 2))
1143             instrs.append((7, 1, 5, 0))
1144             instrs.append((4, 4, 1, 1))
1145
1146         if False:
1147             instrs.append((5, 6, 2, 2))
1148             instrs.append((1, 1, 4, 1))
1149             instrs.append((6, 5, 3, 0))
1150
1151         if False:
1152             # Write-after-Write Hazard
1153             instrs.append( (3, 6, 7, 2) )
1154             instrs.append( (4, 4, 7, 1) )
1155
1156         if False:
1157             # self-read/write-after-write followed by Read-after-Write
1158             instrs.append((1, 1, 1, 1))
1159             instrs.append((1, 5, 3, 0))
1160
1161         if False:
1162             # Read-after-Write followed by self-read-after-write
1163             instrs.append((5, 6, 1, 2))
1164             instrs.append((1, 1, 1, 1))
1165
1166         if False:
1167             # self-read-write sandwich
1168             instrs.append((5, 6, 1, 2))
1169             instrs.append((1, 1, 1, 1))
1170             instrs.append((1, 5, 3, 0))
1171
1172         if False:
1173             # very weird failure
1174             instrs.append( (5, 2, 5, 2) )
1175             instrs.append( (2, 6, 3, 0) )
1176             instrs.append( (4, 2, 2, 1) )
1177
1178         if False:
1179             v1 = 4
1180             yield dut.intregs.regs[5].reg.eq(v1)
1181             alusim.setval(5, v1)
1182             yield dut.intregs.regs[3].reg.eq(5)
1183             alusim.setval(3, 5)
1184             instrs.append((5, 3, 3, 4, (0, 0)))
1185             instrs.append((4, 2, 1, 2, (0, 1)))
1186
1187         if False:
1188             v1 = 6
1189             yield dut.intregs.regs[5].reg.eq(v1)
1190             alusim.setval(5, v1)
1191             yield dut.intregs.regs[3].reg.eq(5)
1192             alusim.setval(3, 5)
1193             instrs.append((5, 3, 3, 4, (0, 0)))
1194             instrs.append((4, 2, 1, 2, (1, 0)))
1195
1196         if False:
1197             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1198             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1199             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1200             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1201             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1202             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1203             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1204             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1205             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1206
1207         # issue instruction(s), wait for issue to be free before proceeding
1208         for i, instr in enumerate(instrs):
1209             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1210
1211             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1212                     (i, src1, src2, dest, op, opi, imm))
1213             alusim.op(op, opi, imm, src1, src2, dest)
1214             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1215                                br_ok, br_fail)
1216
1217         # wait for all instructions to stop before checking
1218         while True:
1219             iqlen = yield dut.qlen_o
1220             if iqlen == 0:
1221                 break
1222             yield
1223         yield
1224         yield
1225         yield
1226         yield
1227         yield from wait_for_busy_clear(dut)
1228
1229         # check status
1230         yield from alusim.check(dut)
1231         yield from alusim.dump(dut)
1232
1233
1234 def test_scoreboard():
1235     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1236     alusim = RegSim(16, 8)
1237     memsim = MemSim(16, 16)
1238     vl = rtlil.convert(dut, ports=dut.ports())
1239     with open("test_scoreboard6600.il", "w") as f:
1240         f.write(vl)
1241
1242     run_simulation(dut, scoreboard_sim(dut, alusim),
1243                         vcd_name='test_scoreboard6600.vcd')
1244
1245     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1246     #                    vcd_name='test_scoreboard6600.vcd')
1247
1248
1249 if __name__ == '__main__':
1250     test_scoreboard()