src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from soc.regfile.regfile import RegFileArray, treereduce
   7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from soc.scoreboard.global_pending import GlobalPending
  10 from soc.scoreboard.group_picker import GroupPicker
  11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  14 from soc.scoreboard.memfu import MemFunctionUnits
  15
  16 from compalu import ComputationUnitNoDelay
  17 from compldst import LDSTCompUnit
  18 from testmem import TestMemory
  19
  20 from alu_hier import ALU, BranchALU
  21 from nmutil.latch import SRLatch
  22 from nmutil.nmoperator import eq
  23
  24 from random import randint, seed
  25 from copy import deepcopy
  26 from math import log
  27
  28
  29 class MemSim:
  30     def __init__(self, regwid, addrw):
  31         self.regwid = regwid
  32         self.ddepth = 1 # regwid//8
  33         depth = (1<<addrw) // self.ddepth
  34         self.mem = list(range(0, depth))
  35
  36     def ld(self, addr):
  37         return self.mem[addr>>self.ddepth]
  38
  39     def st(self, addr, data):
  40         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  41
  42
  43 class CompUnitsBase(Elaboratable):
  44     """ Computation Unit Base class.
  45
  46         Amazingly, this class works recursively.  It's supposed to just
  47         look after some ALUs (that can handle the same operations),
  48         grouping them together, however it turns out that the same code
  49         can also group *groups* of Computation Units together as well.
  50
  51         Basically it was intended just to concatenate the ALU's issue,
  52         go_rd etc. signals together, which start out as bits and become
  53         sequences.  Turns out that the same trick works just as well
  54         on Computation Units!
  55
  56         So this class may be used recursively to present a top-level
  57         sequential concatenation of all the signals in and out of
  58         ALUs, whilst at the same time making it convenient to group
  59         ALUs together.
  60
  61         At the lower level, the intent is that groups of (identical)
  62         ALUs may be passed the same operation.  Even beyond that,
  63         the intent is that that group of (identical) ALUs actually
  64         share the *same pipeline* and as such become a "Concurrent
  65         Computation Unit" as defined by Mitch Alsup (see section
  66         11.4.9.3)
  67     """
  68     def __init__(self, rwid, units, ldstmode=False):
  69         """ Inputs:
  70
  71             * :rwid:   bit width of register file(s) - both FP and INT
  72             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  73         """
  74         self.units = units
  75         self.ldstmode = ldstmode
  76         self.rwid = rwid
  77         self.rwid = rwid
  78         if units and isinstance(units[0], CompUnitsBase):
  79             self.n_units = 0
  80             for u in self.units:
  81                 self.n_units += u.n_units
  82         else:
  83             self.n_units = len(units)
  84
  85         n_units = self.n_units
  86
  87         # inputs
  88         self.issue_i = Signal(n_units, reset_less=True)
  89         self.go_rd_i = Signal(n_units, reset_less=True)
  90         self.go_wr_i = Signal(n_units, reset_less=True)
  91         self.shadown_i = Signal(n_units, reset_less=True)
  92         self.go_die_i = Signal(n_units, reset_less=True)
  93         if ldstmode:
  94             self.go_ad_i = Signal(n_units, reset_less=True)
  95             self.go_st_i = Signal(n_units, reset_less=True)
  96
  97         # outputs
  98         self.busy_o = Signal(n_units, reset_less=True)
  99         self.rd_rel_o = Signal(n_units, reset_less=True)
 100         self.req_rel_o = Signal(n_units, reset_less=True)
 101         self.done_o = Signal(n_units, reset_less=True)
 102         if ldstmode:
 103             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 104             self.st_o = Signal(n_units, reset_less=True) # op is ST
 105             self.adr_rel_o = Signal(n_units, reset_less=True)
 106             self.sto_rel_o = Signal(n_units, reset_less=True)
 107             self.load_mem_o = Signal(n_units, reset_less=True)
 108             self.stwd_mem_o = Signal(n_units, reset_less=True)
 109             self.addr_o = Signal(rwid, reset_less=True)
 110
 111         # in/out register data (note: not register#, actual data)
 112         self.data_o = Signal(rwid, reset_less=True)
 113         self.src1_i = Signal(rwid, reset_less=True)
 114         self.src2_i = Signal(rwid, reset_less=True)
 115         # input operand
 116
 117     def elaborate(self, platform):
 118         m = Module()
 119         comb = m.d.comb
 120
 121         for i, alu in enumerate(self.units):
 122             setattr(m.submodules, "comp%d" % i, alu)
 123
 124         go_rd_l = []
 125         go_wr_l = []
 126         issue_l = []
 127         busy_l = []
 128         req_rel_l = []
 129         done_l = []
 130         rd_rel_l = []
 131         shadow_l = []
 132         godie_l = []
 133         for alu in self.units:
 134             req_rel_l.append(alu.req_rel_o)
 135             done_l.append(alu.done_o)
 136             rd_rel_l.append(alu.rd_rel_o)
 137             shadow_l.append(alu.shadown_i)
 138             godie_l.append(alu.go_die_i)
 139             go_wr_l.append(alu.go_wr_i)
 140             go_rd_l.append(alu.go_rd_i)
 141             issue_l.append(alu.issue_i)
 142             busy_l.append(alu.busy_o)
 143         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 144         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 145         comb += self.done_o.eq(Cat(*done_l))
 146         comb += self.busy_o.eq(Cat(*busy_l))
 147         comb += Cat(*godie_l).eq(self.go_die_i)
 148         comb += Cat(*shadow_l).eq(self.shadown_i)
 149         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 150         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 151         comb += Cat(*issue_l).eq(self.issue_i)
 152
 153         # connect data register input/output
 154
 155         # merge (OR) all integer FU / ALU outputs to a single value
 156         if self.units:
 157             data_o = treereduce(self.units, "data_o")
 158             comb += self.data_o.eq(data_o)
 159             if self.ldstmode:
 160                 addr_o = treereduce(self.units, "addr_o")
 161                 comb += self.addr_o.eq(addr_o)
 162
 163         for i, alu in enumerate(self.units):
 164             comb += alu.src1_i.eq(self.src1_i)
 165             comb += alu.src2_i.eq(self.src2_i)
 166
 167         if not self.ldstmode:
 168             return m
 169
 170         ldmem_l = []
 171         stmem_l = []
 172         go_ad_l = []
 173         go_st_l = []
 174         ld_l = []
 175         st_l = []
 176         adr_rel_l = []
 177         sto_rel_l = []
 178         for alu in self.units:
 179             ld_l.append(alu.ld_o)
 180             st_l.append(alu.st_o)
 181             adr_rel_l.append(alu.adr_rel_o)
 182             sto_rel_l.append(alu.sto_rel_o)
 183             ldmem_l.append(alu.load_mem_o)
 184             stmem_l.append(alu.stwd_mem_o)
 185             go_ad_l.append(alu.go_ad_i)
 186             go_st_l.append(alu.go_st_i)
 187         comb += self.ld_o.eq(Cat(*ld_l))
 188         comb += self.st_o.eq(Cat(*st_l))
 189         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 190         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 191         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 192         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 193         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 194         comb += Cat(*go_st_l).eq(self.go_st_i)
 195
 196         return m
 197
 198
 199 class CompUnitLDSTs(CompUnitsBase):
 200
 201     def __init__(self, rwid, opwid, n_ldsts, mem):
 202         """ Inputs:
 203
 204             * :rwid:   bit width of register file(s) - both FP and INT
 205             * :opwid:  operand bit width
 206         """
 207         self.opwid = opwid
 208
 209         # inputs
 210         self.oper_i = Signal(opwid, reset_less=True)
 211         self.imm_i = Signal(rwid, reset_less=True)
 212
 213         # Int ALUs
 214         self.alus = []
 215         for i in range(n_ldsts):
 216             self.alus.append(ALU(rwid))
 217
 218         units = []
 219         for alu in self.alus:
 220             aluopwid = 4 # see compldst.py for "internal" opcode
 221             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 222
 223         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 224
 225     def elaborate(self, platform):
 226         m = CompUnitsBase.elaborate(self, platform)
 227         comb = m.d.comb
 228
 229         # hand the same operation to all units, 4 lower bits though
 230         for alu in self.units:
 231             comb += alu.oper_i[0:4].eq(self.oper_i)
 232             comb += alu.imm_i.eq(self.imm_i)
 233             comb += alu.isalu_i.eq(0)
 234
 235         return m
 236
 237
 238 class CompUnitALUs(CompUnitsBase):
 239
 240     def __init__(self, rwid, opwid, n_alus):
 241         """ Inputs:
 242
 243             * :rwid:   bit width of register file(s) - both FP and INT
 244             * :opwid:  operand bit width
 245         """
 246         self.opwid = opwid
 247
 248         # inputs
 249         self.oper_i = Signal(opwid, reset_less=True)
 250         self.imm_i = Signal(rwid, reset_less=True)
 251
 252         # Int ALUs
 253         alus = []
 254         for i in range(n_alus):
 255             alus.append(ALU(rwid))
 256
 257         units = []
 258         for alu in alus:
 259             aluopwid = 3 # extra bit for immediate mode
 260             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 261
 262         CompUnitsBase.__init__(self, rwid, units)
 263
 264     def elaborate(self, platform):
 265         m = CompUnitsBase.elaborate(self, platform)
 266         comb = m.d.comb
 267
 268         # hand the same operation to all units, only lower 3 bits though
 269         for alu in self.units:
 270             comb += alu.oper_i[0:3].eq(self.oper_i)
 271             comb += alu.imm_i.eq(self.imm_i)
 272
 273         return m
 274
 275
 276 class CompUnitBR(CompUnitsBase):
 277
 278     def __init__(self, rwid, opwid):
 279         """ Inputs:
 280
 281             * :rwid:   bit width of register file(s) - both FP and INT
 282             * :opwid:  operand bit width
 283
 284             Note: bgt unit is returned so that a shadow unit can be created
 285             for it
 286         """
 287         self.opwid = opwid
 288
 289         # inputs
 290         self.oper_i = Signal(opwid, reset_less=True)
 291         self.imm_i = Signal(rwid, reset_less=True)
 292
 293         # Branch ALU and CU
 294         self.bgt = BranchALU(rwid)
 295         aluopwid = 3 # extra bit for immediate mode
 296         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 297         CompUnitsBase.__init__(self, rwid, [self.br1])
 298
 299     def elaborate(self, platform):
 300         m = CompUnitsBase.elaborate(self, platform)
 301         comb = m.d.comb
 302
 303         # hand the same operation to all units
 304         for alu in self.units:
 305             comb += alu.oper_i.eq(self.oper_i)
 306             comb += alu.imm_i.eq(self.imm_i)
 307
 308         return m
 309
 310
 311 class FunctionUnits(Elaboratable):
 312
 313     def __init__(self, n_regs, n_int_alus):
 314         self.n_regs = n_regs
 315         self.n_int_alus = n_int_alus
 316
 317         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 318         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 319         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 320
 321         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 322         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 323
 324         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 325         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 326         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 327
 328         self.readable_o = Signal(n_int_alus, reset_less=True)
 329         self.writable_o = Signal(n_int_alus, reset_less=True)
 330
 331         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 332         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 333         self.go_die_i = Signal(n_int_alus, reset_less=True)
 334         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 335
 336         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 337
 338     def elaborate(self, platform):
 339         m = Module()
 340         comb = m.d.comb
 341         sync = m.d.sync
 342
 343         n_intfus = self.n_int_alus
 344
 345         # Integer FU-FU Dep Matrix
 346         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 347         m.submodules.intfudeps = intfudeps
 348         # Integer FU-Reg Dep Matrix
 349         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 350         m.submodules.intregdeps = intregdeps
 351
 352         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 353         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 354
 355         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 356         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 357
 358         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 359         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 360         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 361
 362         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 363         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 364         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 365         comb += intfudeps.go_die_i.eq(self.go_die_i)
 366         comb += self.readable_o.eq(intfudeps.readable_o)
 367         comb += self.writable_o.eq(intfudeps.writable_o)
 368
 369         # Connect function issue / arrays, and dest/src1/src2
 370         comb += intregdeps.dest_i.eq(self.dest_i)
 371         comb += intregdeps.src_i[0].eq(self.src1_i)
 372         comb += intregdeps.src_i[1].eq(self.src2_i)
 373
 374         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 375         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 376         comb += intregdeps.go_die_i.eq(self.go_die_i)
 377         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 378
 379         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 380         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 381         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 382
 383         return m
 384
 385
 386 class Scoreboard(Elaboratable):
 387     def __init__(self, rwid, n_regs):
 388         """ Inputs:
 389
 390             * :rwid:   bit width of register file(s) - both FP and INT
 391             * :n_regs: depth of register file(s) - number of FP and INT regs
 392         """
 393         self.rwid = rwid
 394         self.n_regs = n_regs
 395
 396         # Register Files
 397         self.intregs = RegFileArray(rwid, n_regs)
 398         self.fpregs = RegFileArray(rwid, n_regs)
 399
 400         # Memory (test for now)
 401         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 402
 403         # issue q needs to get at these
 404         self.aluissue = IssueUnitGroup(2)
 405         self.lsissue = IssueUnitGroup(2)
 406         self.brissue = IssueUnitGroup(1)
 407         # and these
 408         self.alu_oper_i = Signal(4, reset_less=True)
 409         self.alu_imm_i = Signal(rwid, reset_less=True)
 410         self.br_oper_i = Signal(4, reset_less=True)
 411         self.br_imm_i = Signal(rwid, reset_less=True)
 412         self.ls_oper_i = Signal(4, reset_less=True)
 413         self.ls_imm_i = Signal(rwid, reset_less=True)
 414
 415         # inputs
 416         self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
 417         self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
 418         self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
 419         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 420
 421         # outputs
 422         self.issue_o = Signal(reset_less=True) # instruction was accepted
 423         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 424
 425         # for branch speculation experiment.  branch_direction = 0 if
 426         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 427         # branch_succ and branch_fail are requests to have the current
 428         # instruction be dependent on the branch unit "shadow" capability.
 429         self.branch_succ_i = Signal(reset_less=True)
 430         self.branch_fail_i = Signal(reset_less=True)
 431         self.branch_direction_o = Signal(2, reset_less=True)
 432
 433     def elaborate(self, platform):
 434         m = Module()
 435         comb = m.d.comb
 436         sync = m.d.sync
 437
 438         m.submodules.intregs = self.intregs
 439         m.submodules.fpregs = self.fpregs
 440         m.submodules.mem = mem = self.mem
 441
 442         # register ports
 443         int_dest = self.intregs.write_port("dest")
 444         int_src1 = self.intregs.read_port("src1")
 445         int_src2 = self.intregs.read_port("src2")
 446
 447         fp_dest = self.fpregs.write_port("dest")
 448         fp_src1 = self.fpregs.read_port("src1")
 449         fp_src2 = self.fpregs.read_port("src2")
 450
 451         # Int ALUs and BR ALUs
 452         n_int_alus = 5
 453         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 454         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 455
 456         # LDST Comp Units
 457         n_ldsts = 2
 458         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 459
 460         # Comp Units
 461         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 462         bgt = cub.bgt # get at the branch computation unit
 463         br1 = cub.br1
 464
 465         # Int FUs
 466         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 467
 468         # Memory FUs
 469         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 470
 471         # Memory Priority Picker 1: one gateway per memory port
 472         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 473         m.submodules.mempick1 = mempick1
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545
 546         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 547         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 548         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 549
 550         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 551         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 552         # issue_i.  multi-issue gets a bit more complex but not a lot.
 553         prior_ldsts = Signal(cul.n_units, reset_less=True)
 554         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 555         with m.If(self.ls_oper_i[3]): # LD bit of operand
 556             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 557         with m.If(self.ls_oper_i[2]): # ST bit of operand
 558             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 559
 560         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 561         # just immediately activate go_adr
 562         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 563
 564         # connect up address data
 565         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 566         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 567
 568         # connect loadable / storable to go_ld/go_st.
 569         # XXX should only be done when the memory ld/st has actually happened!
 570         go_st_i = Signal(cul.n_units, reset_less=True)
 571         go_ld_i = Signal(cul.n_units, reset_less=True)
 572         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
 573                                   cul.adr_rel_o & cul.ld_o)
 574         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 575                                   cul.sto_rel_o & cul.st_o)
 576         comb += memfus.go_ld_i.eq(go_ld_i)
 577         comb += memfus.go_st_i.eq(go_st_i)
 578         #comb += cul.go_wr_i.eq(go_ld_i)
 579         comb += cul.go_st_i.eq(go_st_i)
 580
 581         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 582         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 583         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 584
 585         #---------
 586         # merge shadow matrices outputs
 587         #---------
 588
 589         # these are explained in ShadowMatrix docstring, and are to be
 590         # connected to the FUReg and FUFU Matrices, to get them to reset
 591         anydie = Signal(n_intfus, reset_less=True)
 592         allshadown = Signal(n_intfus, reset_less=True)
 593         shreset = Signal(n_intfus, reset_less=True)
 594         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 595         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 596         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 597
 598         #---------
 599         # connect fu-fu matrix
 600         #---------
 601
 602         # Group Picker... done manually for now.
 603         go_rd_o = intpick1.go_rd_o
 604         go_wr_o = intpick1.go_wr_o
 605         go_rd_i = intfus.go_rd_i
 606         go_wr_i = intfus.go_wr_i
 607         go_die_i = intfus.go_die_i
 608         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 609         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 610         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 611         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 612
 613         # Connect Picker
 614         #---------
 615         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 616         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
 617         int_rd_o = intfus.readable_o
 618         int_wr_o = intfus.writable_o
 619         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 620         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 621
 622         #---------
 623         # Shadow Matrix
 624         #---------
 625
 626         comb += shadows.issue_i.eq(fn_issue_o)
 627         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 628         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 629         #---------
 630         # NOTE; this setup is for the instruction order preservation...
 631
 632         # connect shadows / go_dies to Computation Units
 633         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 634         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 635
 636         # ok connect first n_int_fu shadows to busy lines, to create an
 637         # instruction-order linked-list-like arrangement, using a bit-matrix
 638         # (instead of e.g. a ring buffer).
 639
 640         # when written, the shadow can be cancelled (and was good)
 641         for i in range(n_intfus):
 642             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 643
 644         # *previous* instruction shadows *current* instruction, and, obviously,
 645         # if the previous is completed (!busy) don't cast the shadow!
 646         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 647         for i in range(n_intfus):
 648             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 649
 650         #---------
 651         # ... and this is for branch speculation.  it uses the extra bit
 652         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 653         # only needs to set shadow_i, s_fail_i and s_good_i
 654
 655         # issue captures shadow_i (if enabled)
 656         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 657
 658         bactive = Signal(reset_less=True)
 659         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 660
 661         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 662         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 663             comb += bshadow.issue_i.eq(fn_issue_o)
 664             for i in range(n_intfus):
 665                 with m.If(fn_issue_o & (Const(1<<i))):
 666                     comb += bshadow.shadow_i[i][0].eq(1)
 667
 668         # finally, we need an indicator to the test infrastructure as to
 669         # whether the branch succeeded or failed, plus, link up to the
 670         # "recorder" of whether the instruction was under shadow or not
 671
 672         with m.If(br1.issue_i):
 673             sync += bspec.active_i.eq(1)
 674         with m.If(self.branch_succ_i):
 675             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 676         with m.If(self.branch_fail_i):
 677             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 678
 679         # branch is active (TODO: a better signal: this is over-using the
 680         # go_write signal - actually the branch should not be "writing")
 681         with m.If(br1.go_wr_i):
 682             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 683             sync += bspec.active_i.eq(0)
 684             comb += bspec.br_i.eq(1)
 685             # branch occurs if data == 1, failed if data == 0
 686             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 687             for i in range(n_intfus):
 688                 # *expected* direction of the branch matched against *actual*
 689                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 690                 # ... or it didn't
 691                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 692
 693         #---------
 694         # Connect Register File(s)
 695         #---------
 696         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 697         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 698         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 699
 700         # connect ALUs to regfule
 701         comb += int_dest.data_i.eq(cu.data_o)
 702         comb += cu.src1_i.eq(int_src1.data_o)
 703         comb += cu.src2_i.eq(int_src2.data_o)
 704
 705         # connect ALU Computation Units
 706         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 707         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 708         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 709
 710         return m
 711
 712     def __iter__(self):
 713         yield from self.intregs
 714         yield from self.fpregs
 715         yield self.int_dest_i
 716         yield self.int_src1_i
 717         yield self.int_src2_i
 718         yield self.issue_o
 719         yield self.branch_succ_i
 720         yield self.branch_fail_i
 721         yield self.branch_direction_o
 722
 723     def ports(self):
 724         return list(self)
 725
 726
 727 class IssueToScoreboard(Elaboratable):
 728
 729     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 730         self.qlen = qlen
 731         self.n_in = n_in
 732         self.n_out = n_out
 733         self.rwid = rwid
 734         self.opw = opwid
 735         self.n_regs = n_regs
 736
 737         mqbits = unsigned(int(log(qlen) / log(2))+2)
 738         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 739         self.p_ready_o = Signal() # instructions were added
 740         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 741
 742         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 743         self.qlen_o = Signal(mqbits, reset_less=True)
 744
 745     def elaborate(self, platform):
 746         m = Module()
 747         comb = m.d.comb
 748         sync = m.d.sync
 749
 750         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 751         sc = Scoreboard(self.rwid, self.n_regs)
 752         m.submodules.iq = iq
 753         m.submodules.sc = sc
 754
 755         # get at the regfile for testing
 756         self.intregs = sc.intregs
 757
 758         # and the "busy" signal and instruction queue length
 759         comb += self.busy_o.eq(sc.busy_o)
 760         comb += self.qlen_o.eq(iq.qlen_o)
 761
 762         # link up instruction queue
 763         comb += iq.p_add_i.eq(self.p_add_i)
 764         comb += self.p_ready_o.eq(iq.p_ready_o)
 765         for i in range(self.n_in):
 766             comb += eq(iq.data_i[i], self.data_i[i])
 767
 768         # take instruction and process it.  note that it's possible to
 769         # "inspect" the queue contents *without* actually removing the
 770         # items.  items are only removed when the
 771
 772         # in "waiting" state
 773         wait_issue_br = Signal()
 774         wait_issue_alu = Signal()
 775         wait_issue_ls = Signal()
 776
 777         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 778             # set instruction pop length to 1 if the unit accepted
 779             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 780                 with m.If(iq.qlen_o != 0):
 781                     comb += iq.n_sub_i.eq(1)
 782             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 783                 with m.If(iq.qlen_o != 0):
 784                     comb += iq.n_sub_i.eq(1)
 785             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 786                 with m.If(iq.qlen_o != 0):
 787                     comb += iq.n_sub_i.eq(1)
 788
 789         # see if some instruction(s) are here.  note that this is
 790         # "inspecting" the in-place queue.  note also that on the
 791         # cycle following "waiting" for fn_issue_o to be set, the
 792         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 793         with m.If(iq.qlen_o != 0):
 794             # get the operands and operation
 795             imm = iq.data_o[0].imm_i
 796             dest = iq.data_o[0].dest_i
 797             src1 = iq.data_o[0].src1_i
 798             src2 = iq.data_o[0].src2_i
 799             op = iq.data_o[0].oper_i
 800             opi = iq.data_o[0].opim_i # immediate set
 801
 802             # set the src/dest regs
 803             comb += sc.int_dest_i.eq(dest)
 804             comb += sc.int_src1_i.eq(src1)
 805             comb += sc.int_src2_i.eq(src2)
 806             comb += sc.reg_enable_i.eq(1) # enable the regfile
 807
 808             # choose a Function-Unit-Group
 809             with m.If((op & (0x3<<2)) != 0): # branch
 810                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 811                 comb += sc.br_imm_i.eq(imm)
 812                 comb += sc.brissue.insn_i.eq(1)
 813                 comb += wait_issue_br.eq(1)
 814             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 815                 # see compldst.py
 816                 # bit 0: ADD/SUB
 817                 # bit 1: immed
 818                 # bit 4: LD
 819                 # bit 5: ST
 820                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 821                 comb += sc.ls_imm_i.eq(imm)
 822                 comb += sc.lsissue.insn_i.eq(1)
 823                 comb += wait_issue_ls.eq(1)
 824             with m.Else(): # alu
 825                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 826                 comb += sc.alu_imm_i.eq(imm)
 827                 comb += sc.aluissue.insn_i.eq(1)
 828                 comb += wait_issue_alu.eq(1)
 829
 830             # XXX TODO
 831             # these indicate that the instruction is to be made
 832             # shadow-dependent on
 833             # (either) branch success or branch fail
 834             #yield sc.branch_fail_i.eq(branch_fail)
 835             #yield sc.branch_succ_i.eq(branch_success)
 836
 837         return m
 838
 839     def __iter__(self):
 840         yield self.p_ready_o
 841         for o in self.data_i:
 842             yield from list(o)
 843         yield self.p_add_i
 844
 845     def ports(self):
 846         return list(self)
 847
 848
 849 IADD = 0
 850 ISUB = 1
 851 IMUL = 2
 852 ISHF = 3
 853 IBGT = 4
 854 IBLT = 5
 855 IBEQ = 6
 856 IBNE = 7
 857
 858
 859 class RegSim:
 860     def __init__(self, rwidth, nregs):
 861         self.rwidth = rwidth
 862         self.regs = [0] * nregs
 863
 864     def op(self, op, op_imm, imm, src1, src2, dest):
 865         maxbits = (1 << self.rwidth) - 1
 866         src1 = self.regs[src1] & maxbits
 867         if op_imm:
 868             src2 = imm
 869         else:
 870             src2 = self.regs[src2] & maxbits
 871         if op == IADD:
 872             val = src1 + src2
 873         elif op == ISUB:
 874             val = src1 - src2
 875         elif op == IMUL:
 876             val = src1 * src2
 877         elif op == ISHF:
 878             val = src1 >> (src2 & maxbits)
 879         elif op == IBGT:
 880             val = int(src1 > src2)
 881         elif op == IBLT:
 882             val = int(src1 < src2)
 883         elif op == IBEQ:
 884             val = int(src1 == src2)
 885         elif op == IBNE:
 886             val = int(src1 != src2)
 887         else:
 888             return 0 # LD/ST TODO
 889         val &= maxbits
 890         self.setval(dest, val)
 891         return val
 892
 893     def setval(self, dest, val):
 894         print ("sim setval", dest, hex(val))
 895         self.regs[dest] = val
 896
 897     def dump(self, dut):
 898         for i, val in enumerate(self.regs):
 899             reg = yield dut.intregs.regs[i].reg
 900             okstr = "OK" if reg == val else "!ok"
 901             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 902
 903     def check(self, dut):
 904         for i, val in enumerate(self.regs):
 905             reg = yield dut.intregs.regs[i].reg
 906             if reg != val:
 907                 print("reg %d expected %x received %x\n" % (i, val, reg))
 908                 yield from self.dump(dut)
 909                 assert False
 910
 911 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 912             branch_success, branch_fail):
 913     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 914                'src1_i': src1, 'src2_i': src2}]
 915
 916     sendlen = 1
 917     for idx in range(sendlen):
 918         yield from eq(dut.data_i[idx], instrs[idx])
 919         di = yield dut.data_i[idx]
 920         print ("senddata %d %x" % (idx, di))
 921     yield dut.p_add_i.eq(sendlen)
 922     yield
 923     o_p_ready = yield dut.p_ready_o
 924     while not o_p_ready:
 925         yield
 926         o_p_ready = yield dut.p_ready_o
 927
 928     yield dut.p_add_i.eq(0)
 929
 930
 931 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 932     yield from disable_issue(dut)
 933     yield dut.int_dest_i.eq(dest)
 934     yield dut.int_src1_i.eq(src1)
 935     yield dut.int_src2_i.eq(src2)
 936     if (op & (0x3<<2)) != 0: # branch
 937         yield dut.brissue.insn_i.eq(1)
 938         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 939         yield dut.br_imm_i.eq(imm)
 940         dut_issue = dut.brissue
 941     else:
 942         yield dut.aluissue.insn_i.eq(1)
 943         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 944         yield dut.alu_imm_i.eq(imm)
 945         dut_issue = dut.aluissue
 946     yield dut.reg_enable_i.eq(1)
 947
 948     # these indicate that the instruction is to be made shadow-dependent on
 949     # (either) branch success or branch fail
 950     yield dut.branch_fail_i.eq(branch_fail)
 951     yield dut.branch_succ_i.eq(branch_success)
 952
 953     yield
 954     yield from wait_for_issue(dut, dut_issue)
 955
 956
 957 def print_reg(dut, rnums):
 958     rs = []
 959     for rnum in rnums:
 960         reg = yield dut.intregs.regs[rnum].reg
 961         rs.append("%x" % reg)
 962     rnums = map(str, rnums)
 963     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 964
 965
 966 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 967     insts = []
 968     for i in range(n_ops):
 969         src1 = randint(1, dut.n_regs-1)
 970         src2 = randint(1, dut.n_regs-1)
 971         imm = randint(1, (1<<dut.rwid)-1)
 972         dest = randint(1, dut.n_regs-1)
 973         op = randint(0, max_opnums)
 974         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 975
 976         if shadowing:
 977             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 978         else:
 979             insts.append((src1, src2, dest, op, opi, imm))
 980     return insts
 981
 982
 983 def wait_for_busy_clear(dut):
 984     while True:
 985         busy_o = yield dut.busy_o
 986         if not busy_o:
 987             break
 988         print ("busy",)
 989         yield
 990
 991 def disable_issue(dut):
 992     yield dut.aluissue.insn_i.eq(0)
 993     yield dut.brissue.insn_i.eq(0)
 994     yield dut.lsissue.insn_i.eq(0)
 995
 996
 997 def wait_for_issue(dut, dut_issue):
 998     while True:
 999         issue_o = yield dut_issue.fn_issue_o
1000         if issue_o:
1001             yield from disable_issue(dut)
1002             yield dut.reg_enable_i.eq(0)
1003             break
1004         print ("busy",)
1005         #yield from print_reg(dut, [1,2,3])
1006         yield
1007     #yield from print_reg(dut, [1,2,3])
1008
1009 def scoreboard_branch_sim(dut, alusim):
1010
1011     iseed = 3
1012
1013     for i in range(1):
1014
1015         print ("rseed", iseed)
1016         seed(iseed)
1017         iseed += 1
1018
1019         yield dut.branch_direction_o.eq(0)
1020
1021         # set random values in the registers
1022         for i in range(1, dut.n_regs):
1023             val = 31+i*3
1024             val = randint(0, (1<<alusim.rwidth)-1)
1025             yield dut.intregs.regs[i].reg.eq(val)
1026             alusim.setval(i, val)
1027
1028         if False:
1029             # create some instructions: branches create a tree
1030             insts = create_random_ops(dut, 1, True, 1)
1031             #insts.append((6, 6, 1, 2, (0, 0)))
1032             #insts.append((4, 3, 3, 0, (0, 0)))
1033
1034             src1 = randint(1, dut.n_regs-1)
1035             src2 = randint(1, dut.n_regs-1)
1036             #op = randint(4, 7)
1037             op = 4 # only BGT at the moment
1038
1039             branch_ok = create_random_ops(dut, 1, True, 1)
1040             branch_fail = create_random_ops(dut, 1, True, 1)
1041
1042             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1043
1044         if True:
1045             insts = []
1046             insts.append( (3, 5, 2, 0, (0, 0)) )
1047             branch_ok = []
1048             branch_fail = []
1049             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1050             branch_ok.append( None )
1051             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1052             #branch_fail.append( None )
1053             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1054
1055         siminsts = deepcopy(insts)
1056
1057         # issue instruction(s)
1058         i = -1
1059         instrs = insts
1060         branch_direction = 0
1061         while instrs:
1062             yield
1063             yield
1064             i += 1
1065             branch_direction = yield dut.branch_direction_o # way branch went
1066             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1067             if branch_direction == 1 and shadow_on:
1068                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1069                 continue # branch was "success" and this is a "failed"... skip
1070             if branch_direction == 2 and shadow_off:
1071                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1072                 continue # branch was "fail" and this is a "success"... skip
1073             if branch_direction != 0:
1074                 shadow_on = 0
1075                 shadow_off = 0
1076             is_branch = op >= 4
1077             if is_branch:
1078                 branch_ok, branch_fail = dest
1079                 dest = src2
1080                 # ok zip up the branch success / fail instructions and
1081                 # drop them into the queue, one marked "to have branch success"
1082                 # the other to be marked shadow branch "fail".
1083                 # one out of each of these will be cancelled
1084                 for ok, fl in zip(branch_ok, branch_fail):
1085                     if ok:
1086                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1087                     if fl:
1088                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1089             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1090                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1091             yield from int_instr(dut, op, src1, src2, dest,
1092                                  shadow_on, shadow_off)
1093
1094         # wait for all instructions to stop before checking
1095         yield
1096         yield from wait_for_busy_clear(dut)
1097
1098         i = -1
1099         while siminsts:
1100             instr = siminsts.pop(0)
1101             if instr is None:
1102                 continue
1103             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1104             i += 1
1105             is_branch = op >= 4
1106             if is_branch:
1107                 branch_ok, branch_fail = dest
1108                 dest = src2
1109             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1110                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1111             branch_res = alusim.op(op, src1, src2, dest)
1112             if is_branch:
1113                 if branch_res:
1114                     siminsts += branch_ok
1115                 else:
1116                     siminsts += branch_fail
1117
1118         # check status
1119         yield from alusim.check(dut)
1120         yield from alusim.dump(dut)
1121
1122
1123 def scoreboard_sim(dut, alusim):
1124
1125     seed(0)
1126
1127     for i in range(1):
1128
1129         # set random values in the registers
1130         for i in range(1, dut.n_regs):
1131             #val = randint(0, (1<<alusim.rwidth)-1)
1132             #val = 31+i*3
1133             val = i
1134             yield dut.intregs.regs[i].reg.eq(val)
1135             alusim.setval(i, val)
1136
1137         # create some instructions (some random, some regression tests)
1138         instrs = []
1139         if False:
1140             instrs = create_random_ops(dut, 15, True, 4)
1141
1142         if True: # LD/ST test (with immediate)
1143             instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1144             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1145
1146         if True:
1147             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1148
1149         if True:
1150             instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1151             instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1152             instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1153
1154         if True:
1155             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1156             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1157             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1158             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1159             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1160
1161         if False:
1162             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1163             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1164             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1165
1166         if False:
1167             instrs.append((5, 6, 2, 1))
1168             instrs.append((2, 2, 4, 0))
1169             #instrs.append((2, 2, 3, 1))
1170
1171         if False:
1172             instrs.append((2, 1, 2, 3))
1173
1174         if False:
1175             instrs.append((2, 6, 2, 1))
1176             instrs.append((2, 1, 2, 0))
1177
1178         if False:
1179             instrs.append((1, 2, 7, 2))
1180             instrs.append((7, 1, 5, 0))
1181             instrs.append((4, 4, 1, 1))
1182
1183         if False:
1184             instrs.append((5, 6, 2, 2))
1185             instrs.append((1, 1, 4, 1))
1186             instrs.append((6, 5, 3, 0))
1187
1188         if False:
1189             # Write-after-Write Hazard
1190             instrs.append( (3, 6, 7, 2) )
1191             instrs.append( (4, 4, 7, 1) )
1192
1193         if False:
1194             # self-read/write-after-write followed by Read-after-Write
1195             instrs.append((1, 1, 1, 1))
1196             instrs.append((1, 5, 3, 0))
1197
1198         if False:
1199             # Read-after-Write followed by self-read-after-write
1200             instrs.append((5, 6, 1, 2))
1201             instrs.append((1, 1, 1, 1))
1202
1203         if False:
1204             # self-read-write sandwich
1205             instrs.append((5, 6, 1, 2))
1206             instrs.append((1, 1, 1, 1))
1207             instrs.append((1, 5, 3, 0))
1208
1209         if False:
1210             # very weird failure
1211             instrs.append( (5, 2, 5, 2) )
1212             instrs.append( (2, 6, 3, 0) )
1213             instrs.append( (4, 2, 2, 1) )
1214
1215         if False:
1216             v1 = 4
1217             yield dut.intregs.regs[5].reg.eq(v1)
1218             alusim.setval(5, v1)
1219             yield dut.intregs.regs[3].reg.eq(5)
1220             alusim.setval(3, 5)
1221             instrs.append((5, 3, 3, 4, (0, 0)))
1222             instrs.append((4, 2, 1, 2, (0, 1)))
1223
1224         if False:
1225             v1 = 6
1226             yield dut.intregs.regs[5].reg.eq(v1)
1227             alusim.setval(5, v1)
1228             yield dut.intregs.regs[3].reg.eq(5)
1229             alusim.setval(3, 5)
1230             instrs.append((5, 3, 3, 4, (0, 0)))
1231             instrs.append((4, 2, 1, 2, (1, 0)))
1232
1233         if False:
1234             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1235             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1236             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1237             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1238             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1239             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1240             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1241             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1242             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1243
1244         # issue instruction(s), wait for issue to be free before proceeding
1245         for i, instr in enumerate(instrs):
1246             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1247
1248             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1249                     (i, src1, src2, dest, op, opi, imm))
1250             alusim.op(op, opi, imm, src1, src2, dest)
1251             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1252                                br_ok, br_fail)
1253
1254         # wait for all instructions to stop before checking
1255         while True:
1256             iqlen = yield dut.qlen_o
1257             if iqlen == 0:
1258                 break
1259             yield
1260         yield
1261         yield
1262         yield
1263         yield
1264         yield from wait_for_busy_clear(dut)
1265
1266         # check status
1267         yield from alusim.check(dut)
1268         yield from alusim.dump(dut)
1269
1270
1271 def test_scoreboard():
1272     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1273     alusim = RegSim(16, 8)
1274     memsim = MemSim(16, 8)
1275     vl = rtlil.convert(dut, ports=dut.ports())
1276     with open("test_scoreboard6600.il", "w") as f:
1277         f.write(vl)
1278
1279     run_simulation(dut, scoreboard_sim(dut, alusim),
1280                         vcd_name='test_scoreboard6600.vcd')
1281
1282     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1283     #                    vcd_name='test_scoreboard6600.vcd')
1284
1285
1286 if __name__ == '__main__':
1287     test_scoreboard()