src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compldst_multi import LDSTCompUnit
  19 from soc.experiment.testmem import TestMemory
  20
  21 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  22
  23 from openpower.decoder.power_enums import MicrOp, Function
  24 from openpower.decoder.power_decoder import (create_pdecode)
  25 from openpower.decoder.power_decoder2 import (PowerDecode2)
  26 from openpower.simulator.program import Program
  27
  28
  29 from nmutil.latch import SRLatch
  30 from nmutil.nmoperator import eq
  31
  32 from random import randint, seed
  33 from copy import deepcopy
  34 from math import log
  35
  36 from soc.experiment.sim import RegSim, MemSim
  37 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  38
  39
  40 class CompUnitsBase(Elaboratable):
  41     """ Computation Unit Base class.
  42
  43         Amazingly, this class works recursively.  It's supposed to just
  44         look after some ALUs (that can handle the same operations),
  45         grouping them together, however it turns out that the same code
  46         can also group *groups* of Computation Units together as well.
  47
  48         Basically it was intended just to concatenate the ALU's issue,
  49         go_rd etc. signals together, which start out as bits and become
  50         sequences.  Turns out that the same trick works just as well
  51         on Computation Units!
  52
  53         So this class may be used recursively to present a top-level
  54         sequential concatenation of all the signals in and out of
  55         ALUs, whilst at the same time making it convenient to group
  56         ALUs together.
  57
  58         At the lower level, the intent is that groups of (identical)
  59         ALUs may be passed the same operation.  Even beyond that,
  60         the intent is that that group of (identical) ALUs actually
  61         share the *same pipeline* and as such become a "Concurrent
  62         Computation Unit" as defined by Mitch Alsup (see section
  63         11.4.9.3)
  64     """
  65
  66     def __init__(self, rwid, units, ldstmode=False):
  67         """ Inputs:
  68
  69             * :rwid:   bit width of register file(s) - both FP and INT
  70             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  71         """
  72         self.units = units
  73         self.ldstmode = ldstmode
  74         self.rwid = rwid
  75         self.rwid = rwid
  76         if units and isinstance(units[0], CompUnitsBase):
  77             self.n_units = 0
  78             for u in self.units:
  79                 self.n_units += u.n_units
  80         else:
  81             self.n_units = len(units)
  82
  83         n_units = self.n_units
  84
  85         # inputs
  86         self.issue_i = Signal(n_units, reset_less=True)
  87         self.go_rd_i = Signal(n_units, reset_less=True)
  88         self.go_wr_i = Signal(n_units, reset_less=True)
  89         self.shadown_i = Signal(n_units, reset_less=True)
  90         self.go_die_i = Signal(n_units, reset_less=True)
  91         if ldstmode:
  92             self.go_ad_i = Signal(n_units, reset_less=True)
  93             self.go_st_i = Signal(n_units, reset_less=True)
  94
  95         # outputs
  96         self.busy_o = Signal(n_units, reset_less=True)
  97         self.rd_rel_o = Signal(n_units, reset_less=True)
  98         self.req_rel_o = Signal(n_units, reset_less=True)
  99         self.done_o = Signal(n_units, reset_less=True)
 100         if ldstmode:
 101             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 102             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 103             self.adr_rel_o = Signal(n_units, reset_less=True)
 104             self.sto_rel_o = Signal(n_units, reset_less=True)
 105             self.load_mem_o = Signal(n_units, reset_less=True)
 106             self.stwd_mem_o = Signal(n_units, reset_less=True)
 107             self.addr_o = Signal(rwid, reset_less=True)
 108
 109         # in/out register data (note: not register#, actual data)
 110         self.o_data = Signal(rwid, reset_less=True)
 111         self.src1_i = Signal(rwid, reset_less=True)
 112         self.src2_i = Signal(rwid, reset_less=True)
 113         # input operand
 114
 115     def elaborate(self, platform):
 116         m = Module()
 117         comb = m.d.comb
 118
 119         for i, alu in enumerate(self.units):
 120             setattr(m.submodules, "comp%d" % i, alu)
 121
 122         go_rd_l = []
 123         go_wr_l = []
 124         issue_l = []
 125         busy_l = []
 126         req_rel_l = []
 127         done_l = []
 128         rd_rel_l = []
 129         shadow_l = []
 130         godie_l = []
 131         for alu in self.units:
 132             req_rel_l.append(alu.req_rel_o)
 133             done_l.append(alu.done_o)
 134             rd_rel_l.append(alu.rd_rel_o)
 135             shadow_l.append(alu.shadown_i)
 136             godie_l.append(alu.go_die_i)
 137             go_wr_l.append(alu.go_wr_i)
 138             go_rd_l.append(alu.go_rd_i)
 139             issue_l.append(alu.issue_i)
 140             busy_l.append(alu.busy_o)
 141         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 142         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 143         comb += self.done_o.eq(Cat(*done_l))
 144         comb += self.busy_o.eq(Cat(*busy_l))
 145         comb += Cat(*godie_l).eq(self.go_die_i)
 146         comb += Cat(*shadow_l).eq(self.shadown_i)
 147         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 148         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 149         comb += Cat(*issue_l).eq(self.issue_i)
 150
 151         # connect data register input/output
 152
 153         # merge (OR) all integer FU / ALU outputs to a single value
 154         if self.units:
 155             o_data = treereduce(self.units, "o_data")
 156             comb += self.o_data.eq(o_data)
 157             if self.ldstmode:
 158                 addr_o = treereduce(self.units, "addr_o")
 159                 comb += self.addr_o.eq(addr_o)
 160
 161         for i, alu in enumerate(self.units):
 162             comb += alu.src1_i.eq(self.src1_i)
 163             comb += alu.src2_i.eq(self.src2_i)
 164
 165         if not self.ldstmode:
 166             return m
 167
 168         ldmem_l = []
 169         stmem_l = []
 170         go_ad_l = []
 171         go_st_l = []
 172         ld_l = []
 173         st_l = []
 174         adr_rel_l = []
 175         sto_rel_l = []
 176         for alu in self.units:
 177             ld_l.append(alu.ld_o)
 178             st_l.append(alu.st_o)
 179             adr_rel_l.append(alu.adr_rel_o)
 180             sto_rel_l.append(alu.sto_rel_o)
 181             ldmem_l.append(alu.load_mem_o)
 182             stmem_l.append(alu.stwd_mem_o)
 183             go_ad_l.append(alu.go_ad_i)
 184             go_st_l.append(alu.go_st_i)
 185         comb += self.ld_o.eq(Cat(*ld_l))
 186         comb += self.st_o.eq(Cat(*st_l))
 187         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 188         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 189         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 190         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 191         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 192         comb += Cat(*go_st_l).eq(self.go_st_i)
 193
 194         return m
 195
 196
 197 class CompUnitLDSTs(CompUnitsBase):
 198
 199     def __init__(self, rwid, opwid, n_ldsts, mem):
 200         """ Inputs:
 201
 202             * :rwid:   bit width of register file(s) - both FP and INT
 203             * :opwid:  operand bit width
 204         """
 205         self.opwid = opwid
 206
 207         # inputs
 208         self.oper_i = Signal(opwid, reset_less=True)
 209         self.imm_i = Signal(rwid, reset_less=True)
 210
 211         # Int ALUs
 212         self.alus = []
 213         for i in range(n_ldsts):
 214             self.alus.append(ALU(rwid))
 215
 216         units = []
 217         for alu in self.alus:
 218             aluopwid = 4  # see compldst.py for "internal" opcode
 219             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 220
 221         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 222
 223     def elaborate(self, platform):
 224         m = CompUnitsBase.elaborate(self, platform)
 225         comb = m.d.comb
 226
 227         # hand the same operation to all units, 4 lower bits though
 228         for alu in self.units:
 229             comb += alu.oper_i[0:4].eq(self.oper_i)
 230             comb += alu.imm_i.eq(self.imm_i)
 231             comb += alu.isalu_i.eq(0)
 232
 233         return m
 234
 235
 236 class CompUnitALUs(CompUnitsBase):
 237
 238     def __init__(self, rwid, opwid, n_alus):
 239         """ Inputs:
 240
 241             * :rwid:   bit width of register file(s) - both FP and INT
 242             * :opwid:  operand bit width
 243         """
 244         self.opwid = opwid
 245
 246         # inputs
 247         self.op = CompALUOpSubset("cua_i")
 248         self.oper_i = Signal(opwid, reset_less=True)
 249         self.imm_i = Signal(rwid, reset_less=True)
 250
 251         # Int ALUs
 252         alus = []
 253         for i in range(n_alus):
 254             alus.append(ALU(rwid))
 255
 256         units = []
 257         for alu in alus:
 258             aluopwid = 3  # extra bit for immediate mode
 259             units.append(ComputationUnitNoDelay(rwid, alu))
 260
 261         CompUnitsBase.__init__(self, rwid, units)
 262
 263     def elaborate(self, platform):
 264         m = CompUnitsBase.elaborate(self, platform)
 265         comb = m.d.comb
 266
 267         # hand the subset of operation to ALUs
 268         for alu in self.units:
 269             comb += alu.oper_i.eq(self.op)
 270             #comb += alu.oper_i[0:3].eq(self.oper_i)
 271             #comb += alu.imm_i.eq(self.imm_i)
 272
 273         return m
 274
 275
 276 class CompUnitBR(CompUnitsBase):
 277
 278     def __init__(self, rwid, opwid):
 279         """ Inputs:
 280
 281             * :rwid:   bit width of register file(s) - both FP and INT
 282             * :opwid:  operand bit width
 283
 284             Note: bgt unit is returned so that a shadow unit can be created
 285             for it
 286         """
 287         self.opwid = opwid
 288
 289         # inputs
 290         self.oper_i = Signal(opwid, reset_less=True)
 291         self.imm_i = Signal(rwid, reset_less=True)
 292
 293         # Branch ALU and CU
 294         self.bgt = BranchALU(rwid)
 295         aluopwid = 3  # extra bit for immediate mode
 296         self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
 297         CompUnitsBase.__init__(self, rwid, [self.br1])
 298
 299     def elaborate(self, platform):
 300         m = CompUnitsBase.elaborate(self, platform)
 301         comb = m.d.comb
 302
 303         # hand the same operation to all units
 304         for alu in self.units:
 305             comb += alu.oper_i.eq(self.oper_i)
 306             #comb += alu.imm_i.eq(self.imm_i)
 307
 308         return m
 309
 310
 311 class FunctionUnits(Elaboratable):
 312
 313     def __init__(self, n_regs, n_int_alus):
 314         self.n_regs = n_regs
 315         self.n_int_alus = n_int_alus
 316
 317         self.dest_i = Signal(n_regs, reset_less=True)  # Dest R# in
 318         self.src1_i = Signal(n_regs, reset_less=True)  # oper1 R# in
 319         self.src2_i = Signal(n_regs, reset_less=True)  # oper2 R# in
 320
 321         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 322         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 323
 324         self.dest_rsel_o = Signal(n_regs, reset_less=True)  # dest reg (bot)
 325         self.src1_rsel_o = Signal(n_regs, reset_less=True)  # src1 reg (bot)
 326         self.src2_rsel_o = Signal(n_regs, reset_less=True)  # src2 reg (bot)
 327
 328         self.readable_o = Signal(n_int_alus, reset_less=True)
 329         self.writable_o = Signal(n_int_alus, reset_less=True)
 330
 331         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 332         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 333         self.go_die_i = Signal(n_int_alus, reset_less=True)
 334         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 335
 336         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 337
 338     def elaborate(self, platform):
 339         m = Module()
 340         comb = m.d.comb
 341         sync = m.d.sync
 342
 343         n_intfus = self.n_int_alus
 344
 345         # Integer FU-FU Dep Matrix
 346         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 347         m.submodules.intfudeps = intfudeps
 348         # Integer FU-Reg Dep Matrix
 349         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 350         m.submodules.intregdeps = intregdeps
 351
 352         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 353         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 354
 355         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 356         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 357
 358         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 359         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 360         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 361
 362         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 363         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 364         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 365         comb += intfudeps.go_die_i.eq(self.go_die_i)
 366         comb += self.readable_o.eq(intfudeps.readable_o)
 367         comb += self.writable_o.eq(intfudeps.writable_o)
 368
 369         # Connect function issue / arrays, and dest/src1/src2
 370         comb += intregdeps.dest_i.eq(self.dest_i)
 371         comb += intregdeps.src_i[0].eq(self.src1_i)
 372         comb += intregdeps.src_i[1].eq(self.src2_i)
 373
 374         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 375         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 376         comb += intregdeps.go_die_i.eq(self.go_die_i)
 377         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 378
 379         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 380         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 381         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 382
 383         return m
 384
 385
 386 class Scoreboard(Elaboratable):
 387     def __init__(self, rwid, n_regs):
 388         """ Inputs:
 389
 390             * :rwid:   bit width of register file(s) - both FP and INT
 391             * :n_regs: depth of register file(s) - number of FP and INT regs
 392         """
 393         self.rwid = rwid
 394         self.n_regs = n_regs
 395
 396         # Register Files
 397         self.intregs = RegFileArray(rwid, n_regs)
 398         self.fpregs = RegFileArray(rwid, n_regs)
 399
 400         # Memory (test for now)
 401         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 402
 403         # issue q needs to get at these
 404         self.aluissue = IssueUnitGroup(2)
 405         self.lsissue = IssueUnitGroup(2)
 406         self.brissue = IssueUnitGroup(1)
 407         # and these
 408         self.alu_op = CompALUOpSubset("alu")
 409         self.br_oper_i = Signal(4, reset_less=True)
 410         self.br_imm_i = Signal(rwid, reset_less=True)
 411         self.ls_oper_i = Signal(4, reset_less=True)
 412         self.ls_imm_i = Signal(rwid, reset_less=True)
 413
 414         # inputs
 415         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 416         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 417         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 418         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 419
 420         # outputs
 421         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 422         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 423
 424         # for branch speculation experiment.  branch_direction = 0 if
 425         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 426         # branch_succ and branch_fail are requests to have the current
 427         # instruction be dependent on the branch unit "shadow" capability.
 428         self.branch_succ_i = Signal(reset_less=True)
 429         self.branch_fail_i = Signal(reset_less=True)
 430         self.branch_direction_o = Signal(2, reset_less=True)
 431
 432     def elaborate(self, platform):
 433         m = Module()
 434         comb = m.d.comb
 435         sync = m.d.sync
 436
 437         m.submodules.intregs = self.intregs
 438         m.submodules.fpregs = self.fpregs
 439         m.submodules.mem = mem = self.mem
 440
 441         # register ports
 442         int_dest = self.intregs.write_port("dest")
 443         int_src1 = self.intregs.read_port("src1")
 444         int_src2 = self.intregs.read_port("src2")
 445
 446         fp_dest = self.fpregs.write_port("dest")
 447         fp_src1 = self.fpregs.read_port("src1")
 448         fp_src2 = self.fpregs.read_port("src2")
 449
 450         # Int ALUs and BR ALUs
 451         n_int_alus = 5
 452         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 453         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 454
 455         # LDST Comp Units
 456         n_ldsts = 2
 457         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 458
 459         # Comp Units
 460         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 461         bgt = cub.bgt  # get at the branch computation unit
 462         br1 = cub.br1
 463
 464         # Int FUs
 465         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 466
 467         # Memory FUs
 468         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 469
 470         # Memory Priority Picker 1: one gateway per memory port
 471         # picks 1 reader and 1 writer to intreg
 472         mempick1 = GroupPicker(n_ldsts)
 473         m.submodules.mempick1 = mempick1
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0  # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         # picks 1 reader and 1 writer to intreg
 481         intpick1 = GroupPicker(n_intfus)
 482         m.submodules.intpick1 = intpick1
 483
 484         # INT/FP Issue Unit
 485         regdecode = RegDecode(self.n_regs)
 486         m.submodules.regdecode = regdecode
 487         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 488         m.submodules.issueunit = issueunit
 489
 490         # Shadow Matrix.  currently n_intfus shadows, to be used for
 491         # write-after-write hazards.  NOTE: there is one extra for branches,
 492         # so the shadow width is increased by 1
 493         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 494         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 495
 496         # record previous instruction to cast shadow on current instruction
 497         prev_shadow = Signal(n_intfus)
 498
 499         # Branch Speculation recorder.  tracks the success/fail state as
 500         # each instruction is issued, so that when the branch occurs the
 501         # allow/cancel can be issued as appropriate.
 502         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 503
 504         # ---------
 505         # ok start wiring things together...
 506         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 507         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 508         # ---------
 509
 510         # ---------
 511         # Issue Unit is where it starts.  set up some in/outs for this module
 512         # ---------
 513         comb += [regdecode.dest_i.eq(self.int_dest_i),
 514                  regdecode.src1_i.eq(self.int_src1_i),
 515                  regdecode.src2_i.eq(self.int_src2_i),
 516                  regdecode.enable_i.eq(self.reg_enable_i),
 517                  self.issue_o.eq(issueunit.issue_o)
 518                  ]
 519
 520         # take these to outside (issue needs them)
 521         comb += cua.op.eq(self.alu_op)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         # ---------
 541         # Memory Function Unit
 542         # ---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545
 546         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 547         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 548         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 549
 550         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 551         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 552         # issue_i.  multi-issue gets a bit more complex but not a lot.
 553         prior_ldsts = Signal(cul.n_units, reset_less=True)
 554         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 555         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 556             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 557         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 558             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 559
 560         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 561         # just immediately activate go_adr
 562         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 563
 564         # connect up address data
 565         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 566         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 567
 568         # connect loadable / storable to go_ld/go_st.
 569         # XXX should only be done when the memory ld/st has actually happened!
 570         go_st_i = Signal(cul.n_units, reset_less=True)
 571         go_ld_i = Signal(cul.n_units, reset_less=True)
 572         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 573                            cul.adr_rel_o & cul.ld_o)
 574         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 575                            cul.sto_rel_o & cul.st_o)
 576         comb += memfus.go_ld_i.eq(go_ld_i)
 577         comb += memfus.go_st_i.eq(go_st_i)
 578         #comb += cul.go_wr_i.eq(go_ld_i)
 579         comb += cul.go_st_i.eq(go_st_i)
 580
 581         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 582         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 583         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 584
 585         # ---------
 586         # merge shadow matrices outputs
 587         # ---------
 588
 589         # these are explained in ShadowMatrix docstring, and are to be
 590         # connected to the FUReg and FUFU Matrices, to get them to reset
 591         anydie = Signal(n_intfus, reset_less=True)
 592         allshadown = Signal(n_intfus, reset_less=True)
 593         shreset = Signal(n_intfus, reset_less=True)
 594         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 595         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 596         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 597
 598         # ---------
 599         # connect fu-fu matrix
 600         # ---------
 601
 602         # Group Picker... done manually for now.
 603         go_rd_o = intpick1.go_rd_o
 604         go_wr_o = intpick1.go_wr_o
 605         go_rd_i = intfus.go_rd_i
 606         go_wr_i = intfus.go_wr_i
 607         go_die_i = intfus.go_die_i
 608         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 609         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])  # rd
 610         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])  # wr
 611         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 612
 613         # Connect Picker
 614         # ---------
 615         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 616         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
 617         int_rd_o = intfus.readable_o
 618         int_wr_o = intfus.writable_o
 619         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 620         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 621
 622         # ---------
 623         # Shadow Matrix
 624         # ---------
 625
 626         comb += shadows.issue_i.eq(fn_issue_o)
 627         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 628         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 629         # ---------
 630         # NOTE; this setup is for the instruction order preservation...
 631
 632         # connect shadows / go_dies to Computation Units
 633         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 634         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 635
 636         # ok connect first n_int_fu shadows to busy lines, to create an
 637         # instruction-order linked-list-like arrangement, using a bit-matrix
 638         # (instead of e.g. a ring buffer).
 639
 640         # when written, the shadow can be cancelled (and was good)
 641         for i in range(n_intfus):
 642             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 643
 644         # *previous* instruction shadows *current* instruction, and, obviously,
 645         # if the previous is completed (!busy) don't cast the shadow!
 646         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 647         for i in range(n_intfus):
 648             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 649
 650         # ---------
 651         # ... and this is for branch speculation.  it uses the extra bit
 652         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 653         # only needs to set shadow_i, s_fail_i and s_good_i
 654
 655         # issue captures shadow_i (if enabled)
 656         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 657
 658         bactive = Signal(reset_less=True)
 659         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 660
 661         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 662         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 663             comb += bshadow.issue_i.eq(fn_issue_o)
 664             for i in range(n_intfus):
 665                 with m.If(fn_issue_o & (Const(1 << i))):
 666                     comb += bshadow.shadow_i[i][0].eq(1)
 667
 668         # finally, we need an indicator to the test infrastructure as to
 669         # whether the branch succeeded or failed, plus, link up to the
 670         # "recorder" of whether the instruction was under shadow or not
 671
 672         with m.If(br1.issue_i):
 673             sync += bspec.active_i.eq(1)
 674         with m.If(self.branch_succ_i):
 675             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 676         with m.If(self.branch_fail_i):
 677             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 678
 679         # branch is active (TODO: a better signal: this is over-using the
 680         # go_write signal - actually the branch should not be "writing")
 681         with m.If(br1.go_wr_i):
 682             sync += self.branch_direction_o.eq(br1.o_data+Const(1, 2))
 683             sync += bspec.active_i.eq(0)
 684             comb += bspec.br_i.eq(1)
 685             # branch occurs if data == 1, failed if data == 0
 686             comb += bspec.br_ok_i.eq(br1.o_data == 1)
 687             for i in range(n_intfus):
 688                 # *expected* direction of the branch matched against *actual*
 689                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 690                 # ... or it didn't
 691                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 692
 693         # ---------
 694         # Connect Register File(s)
 695         # ---------
 696         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 697         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 698         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 699
 700         # connect ALUs to regfile
 701         comb += int_dest.i_data.eq(cu.o_data)
 702         comb += cu.src1_i.eq(int_src1.o_data)
 703         comb += cu.src2_i.eq(int_src2.o_data)
 704
 705         # connect ALU Computation Units
 706         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 707         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 708         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 709
 710         return m
 711
 712     def __iter__(self):
 713         yield from self.intregs
 714         yield from self.fpregs
 715         yield self.int_dest_i
 716         yield self.int_src1_i
 717         yield self.int_src2_i
 718         yield self.issue_o
 719         yield self.branch_succ_i
 720         yield self.branch_fail_i
 721         yield self.branch_direction_o
 722
 723     def ports(self):
 724         return list(self)
 725
 726
 727 class IssueToScoreboard(Elaboratable):
 728
 729     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 730         self.qlen = qlen
 731         self.n_in = n_in
 732         self.n_out = n_out
 733         self.rwid = rwid
 734         self.opw = opwid
 735         self.n_regs = n_regs
 736
 737         mqbits = unsigned(int(log(qlen) / log(2))+2)
 738         self.p_add_i = Signal(mqbits)  # instructions to add (from i_data)
 739         self.p_o_ready = Signal()  # instructions were added
 740         self.i_data = Instruction._nq(n_in, "i_data")
 741
 742         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 743         self.qlen_o = Signal(mqbits, reset_less=True)
 744
 745     def elaborate(self, platform):
 746         m = Module()
 747         comb = m.d.comb
 748         sync = m.d.sync
 749
 750         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 751                           self.n_in, self.n_out)
 752         sc = Scoreboard(self.rwid, self.n_regs)
 753         m.submodules.iq = iq
 754         m.submodules.sc = sc
 755
 756         # get at the regfile for testing
 757         self.intregs = sc.intregs
 758
 759         # and the "busy" signal and instruction queue length
 760         comb += self.busy_o.eq(sc.busy_o)
 761         comb += self.qlen_o.eq(iq.qlen_o)
 762
 763         # link up instruction queue
 764         comb += iq.p_add_i.eq(self.p_add_i)
 765         comb += self.p_o_ready.eq(iq.p_o_ready)
 766         for i in range(self.n_in):
 767             comb += eq(iq.i_data[i], self.i_data[i])
 768
 769         # take instruction and process it.  note that it's possible to
 770         # "inspect" the queue contents *without* actually removing the
 771         # items.  items are only removed when the
 772
 773         # in "waiting" state
 774         wait_issue_br = Signal()
 775         wait_issue_alu = Signal()
 776         wait_issue_ls = Signal()
 777
 778         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 779             # set instruction pop length to 1 if the unit accepted
 780             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 781                 with m.If(iq.qlen_o != 0):
 782                     comb += iq.n_sub_i.eq(1)
 783             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 784                 with m.If(iq.qlen_o != 0):
 785                     comb += iq.n_sub_i.eq(1)
 786             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 787                 with m.If(iq.qlen_o != 0):
 788                     comb += iq.n_sub_i.eq(1)
 789
 790         # see if some instruction(s) are here.  note that this is
 791         # "inspecting" the in-place queue.  note also that on the
 792         # cycle following "waiting" for fn_issue_o to be set, the
 793         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 794         with m.If(iq.qlen_o != 0):
 795             # get the operands and operation
 796             instr = iq.o_data[0]
 797             imm = instr.imm_data.data
 798             dest = instr.write_reg.data
 799             src1 = instr.read_reg1.data
 800             src2 = instr.read_reg2.data
 801             op = instr.insn_type
 802             fu = instr.fn_unit
 803             opi = instr.imm_data.ok  # immediate set
 804
 805             # set the src/dest regs
 806             comb += sc.int_dest_i.eq(dest)
 807             comb += sc.int_src1_i.eq(src1)
 808             comb += sc.int_src2_i.eq(src2)
 809             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 810
 811             # choose a Function-Unit-Group
 812             with m.If(fu == Function.ALU):  # alu
 813                 comb += sc.alu_op.eq_from_execute1(instr)
 814                 comb += sc.aluissue.insn_i.eq(1)
 815                 comb += wait_issue_alu.eq(1)
 816             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 817                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 818                 comb += sc.br_imm_i.eq(imm)
 819                 comb += sc.brissue.insn_i.eq(1)
 820                 comb += wait_issue_br.eq(1)
 821             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 822                 # see compldst.py
 823                 # bit 0: ADD/SUB
 824                 # bit 1: immed
 825                 # bit 4: LD
 826                 # bit 5: ST
 827                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 828                 comb += sc.ls_imm_i.eq(imm)
 829                 comb += sc.lsissue.insn_i.eq(1)
 830                 comb += wait_issue_ls.eq(1)
 831
 832             # XXX TODO
 833             # these indicate that the instruction is to be made
 834             # shadow-dependent on
 835             # (either) branch success or branch fail
 836             # yield sc.branch_fail_i.eq(branch_fail)
 837             # yield sc.branch_succ_i.eq(branch_success)
 838
 839         return m
 840
 841     def __iter__(self):
 842         yield self.p_o_ready
 843         for o in self.i_data:
 844             yield from list(o)
 845         yield self.p_add_i
 846
 847     def ports(self):
 848         return list(self)
 849
 850
 851 def power_instr_q(dut, pdecode2, ins, code):
 852     instrs = [pdecode2.e]
 853
 854     sendlen = 1
 855     for idx, instr in enumerate(instrs):
 856         yield dut.i_data[idx].eq(instr)
 857         insn_type = yield instr.insn_type
 858         fn_unit = yield instr.fn_unit
 859         print("senddata ", idx, insn_type, fn_unit, instr)
 860     yield dut.p_add_i.eq(sendlen)
 861     yield
 862     o_p_ready = yield dut.p_o_ready
 863     while not o_p_ready:
 864         yield
 865         o_p_ready = yield dut.p_o_ready
 866
 867     yield dut.p_add_i.eq(0)
 868
 869
 870 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 871             branch_success, branch_fail):
 872     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 873                'imm_data': (imm, op_imm),
 874                'read_reg1': src1, 'read_reg2': src2}]
 875
 876     sendlen = 1
 877     for idx, instr in enumerate(instrs):
 878         imm, op_imm = instr['imm_data']
 879         reg1 = instr['read_reg1']
 880         reg2 = instr['read_reg2']
 881         dest = instr['write_reg']
 882         insn_type = instr['insn_type']
 883         fn_unit = instr['fn_unit']
 884         yield dut.i_data[idx].insn_type.eq(insn_type)
 885         yield dut.i_data[idx].fn_unit.eq(fn_unit)
 886         yield dut.i_data[idx].read_reg1.data.eq(reg1)
 887         yield dut.i_data[idx].read_reg1.ok.eq(1)  # XXX TODO
 888         yield dut.i_data[idx].read_reg2.data.eq(reg2)
 889         yield dut.i_data[idx].read_reg2.ok.eq(1)  # XXX TODO
 890         yield dut.i_data[idx].write_reg.data.eq(dest)
 891         yield dut.i_data[idx].write_reg.ok.eq(1)  # XXX TODO
 892         yield dut.i_data[idx].imm_data.data.eq(imm)
 893         yield dut.i_data[idx].imm_data.ok.eq(op_imm)
 894         di = yield dut.i_data[idx]
 895         print("senddata %d %x" % (idx, di))
 896     yield dut.p_add_i.eq(sendlen)
 897     yield
 898     o_p_ready = yield dut.p_o_ready
 899     while not o_p_ready:
 900         yield
 901         o_p_ready = yield dut.p_o_ready
 902
 903     yield dut.p_add_i.eq(0)
 904
 905
 906 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 907     yield from disable_issue(dut)
 908     yield dut.int_dest_i.eq(dest)
 909     yield dut.int_src1_i.eq(src1)
 910     yield dut.int_src2_i.eq(src2)
 911     if (op & (0x3 << 2)) != 0:  # branch
 912         yield dut.brissue.insn_i.eq(1)
 913         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 914         yield dut.br_imm_i.eq(imm)
 915         dut_issue = dut.brissue
 916     else:
 917         yield dut.aluissue.insn_i.eq(1)
 918         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 919         yield dut.alu_imm_i.eq(imm)
 920         dut_issue = dut.aluissue
 921     yield dut.reg_enable_i.eq(1)
 922
 923     # these indicate that the instruction is to be made shadow-dependent on
 924     # (either) branch success or branch fail
 925     yield dut.branch_fail_i.eq(branch_fail)
 926     yield dut.branch_succ_i.eq(branch_success)
 927
 928     yield
 929     yield from wait_for_issue(dut, dut_issue)
 930
 931
 932 def print_reg(dut, rnums):
 933     rs = []
 934     for rnum in rnums:
 935         reg = yield dut.intregs.regs[rnum].reg
 936         rs.append("%x" % reg)
 937     rnums = map(str, rnums)
 938     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 939
 940
 941 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 942     insts = []
 943     for i in range(n_ops):
 944         src1 = randint(1, dut.n_regs-1)
 945         src2 = randint(1, dut.n_regs-1)
 946         imm = randint(1, (1 << dut.rwid)-1)
 947         dest = randint(1, dut.n_regs-1)
 948         op = randint(0, max_opnums)
 949         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 950
 951         if shadowing:
 952             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 953         else:
 954             insts.append((src1, src2, dest, op, opi, imm))
 955     return insts
 956
 957
 958 def wait_for_busy_clear(dut):
 959     while True:
 960         busy_o = yield dut.busy_o
 961         if not busy_o:
 962             break
 963         print("busy",)
 964         yield
 965
 966
 967 def disable_issue(dut):
 968     yield dut.aluissue.insn_i.eq(0)
 969     yield dut.brissue.insn_i.eq(0)
 970     yield dut.lsissue.insn_i.eq(0)
 971
 972
 973 def wait_for_issue(dut, dut_issue):
 974     while True:
 975         issue_o = yield dut_issue.fn_issue_o
 976         if issue_o:
 977             yield from disable_issue(dut)
 978             yield dut.reg_enable_i.eq(0)
 979             break
 980         print("busy",)
 981         # yield from print_reg(dut, [1,2,3])
 982         yield
 983     # yield from print_reg(dut, [1,2,3])
 984
 985
 986 def scoreboard_branch_sim(dut, alusim):
 987
 988     iseed = 3
 989
 990     for i in range(1):
 991
 992         print("rseed", iseed)
 993         seed(iseed)
 994         iseed += 1
 995
 996         yield dut.branch_direction_o.eq(0)
 997
 998         # set random values in the registers
 999         for i in range(1, dut.n_regs):
1000             val = 31+i*3
1001             val = randint(0, (1 << alusim.rwidth)-1)
1002             yield dut.intregs.regs[i].reg.eq(val)
1003             alusim.setval(i, val)
1004
1005         if False:
1006             # create some instructions: branches create a tree
1007             insts = create_random_ops(dut, 1, True, 1)
1008             #insts.append((6, 6, 1, 2, (0, 0)))
1009             #insts.append((4, 3, 3, 0, (0, 0)))
1010
1011             src1 = randint(1, dut.n_regs-1)
1012             src2 = randint(1, dut.n_regs-1)
1013             #op = randint(4, 7)
1014             op = 4  # only BGT at the moment
1015
1016             branch_ok = create_random_ops(dut, 1, True, 1)
1017             branch_fail = create_random_ops(dut, 1, True, 1)
1018
1019             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1020
1021         if True:
1022             insts = []
1023             insts.append((3, 5, 2, 0, (0, 0)))
1024             branch_ok = []
1025             branch_fail = []
1026             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1027             branch_ok.append(None)
1028             branch_fail.append((1, 1, 2, 0, (0, 1)))
1029             #branch_fail.append( None )
1030             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1031
1032         siminsts = deepcopy(insts)
1033
1034         # issue instruction(s)
1035         i = -1
1036         instrs = insts
1037         branch_direction = 0
1038         while instrs:
1039             yield
1040             yield
1041             i += 1
1042             branch_direction = yield dut.branch_direction_o  # way branch went
1043             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1044             if branch_direction == 1 and shadow_on:
1045                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1046                 continue  # branch was "success" and this is a "failed"... skip
1047             if branch_direction == 2 and shadow_off:
1048                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1049                 continue  # branch was "fail" and this is a "success"... skip
1050             if branch_direction != 0:
1051                 shadow_on = 0
1052                 shadow_off = 0
1053             is_branch = op >= 4
1054             if is_branch:
1055                 branch_ok, branch_fail = dest
1056                 dest = src2
1057                 # ok zip up the branch success / fail instructions and
1058                 # drop them into the queue, one marked "to have branch success"
1059                 # the other to be marked shadow branch "fail".
1060                 # one out of each of these will be cancelled
1061                 for ok, fl in zip(branch_ok, branch_fail):
1062                     if ok:
1063                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1064                     if fl:
1065                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1066             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1067                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1068             yield from int_instr(dut, op, src1, src2, dest,
1069                                  shadow_on, shadow_off)
1070
1071         # wait for all instructions to stop before checking
1072         yield
1073         yield from wait_for_busy_clear(dut)
1074
1075         i = -1
1076         while siminsts:
1077             instr = siminsts.pop(0)
1078             if instr is None:
1079                 continue
1080             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1081             i += 1
1082             is_branch = op >= 4
1083             if is_branch:
1084                 branch_ok, branch_fail = dest
1085                 dest = src2
1086             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1087                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1088             branch_res = alusim.op(op, src1, src2, dest)
1089             if is_branch:
1090                 if branch_res:
1091                     siminsts += branch_ok
1092                 else:
1093                     siminsts += branch_fail
1094
1095         # check status
1096         yield from alusim.check(dut)
1097         yield from alusim.dump(dut)
1098
1099
1100 def power_sim(m, dut, pdecode2, instruction, alusim):
1101
1102     seed(0)
1103
1104     for i in range(1):
1105
1106         # set random values in the registers
1107         for i in range(1, dut.n_regs):
1108             #val = randint(0, (1<<alusim.rwidth)-1)
1109             #val = 31+i*3
1110             val = i  # XXX actually, not random at all
1111             yield dut.intregs.regs[i].reg.eq(val)
1112             alusim.setval(i, val)
1113
1114         # create some instructions
1115         lst = ["addi 3, 0, 0x1234",
1116                "addi 2, 0, 0x4321",
1117                "add  1, 3, 2"]
1118         with Program(lst) as program:
1119             gen = program.generate_instructions()
1120
1121             # issue instruction(s), wait for issue to be free before proceeding
1122             for ins, code in zip(gen, program.assembly.splitlines()):
1123                 yield instruction.eq(ins)          # raw binary instr.
1124                 yield Delay(1e-6)
1125
1126                 print("binary 0x{:X}".format(ins & 0xffffffff))
1127                 print("assembly", code)
1128
1129                 #alusim.op(op, opi, imm, src1, src2, dest)
1130                 yield from power_instr_q(dut, pdecode2, ins, code)
1131
1132         # wait for all instructions to stop before checking
1133         while True:
1134             iqlen = yield dut.qlen_o
1135             if iqlen == 0:
1136                 break
1137             yield
1138         yield
1139         yield
1140         yield
1141         yield
1142         yield from wait_for_busy_clear(dut)
1143
1144         # check status
1145         yield from alusim.check(dut)
1146         yield from alusim.dump(dut)
1147
1148
1149 def scoreboard_sim(dut, alusim):
1150
1151     seed(0)
1152
1153     for i in range(1):
1154
1155         # set random values in the registers
1156         for i in range(1, dut.n_regs):
1157             #val = randint(0, (1<<alusim.rwidth)-1)
1158             #val = 31+i*3
1159             val = i
1160             yield dut.intregs.regs[i].reg.eq(val)
1161             alusim.setval(i, val)
1162
1163         # create some instructions (some random, some regression tests)
1164         instrs = []
1165         if False:
1166             instrs = create_random_ops(dut, 15, True, 4)
1167
1168         if False:  # LD/ST test (with immediate)
1169             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1170             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1171
1172         if False:
1173             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1174
1175         if False:
1176             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1177             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1178             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1179
1180         if True:
1181             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1182                            0, 0, (0, 0)))
1183             instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1184                            0, 0, (0, 0)))
1185         if False:
1186             instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1187                            1, 7, (0, 0)))
1188         if False:
1189             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1190                            0, 0, (0, 0)))
1191
1192         if False:
1193             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1194             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1195             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1196             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1197             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1198
1199         if False:
1200             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1201             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1202             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1203
1204         if False:
1205             instrs.append((5, 6, 2, 1))
1206             instrs.append((2, 2, 4, 0))
1207             #instrs.append((2, 2, 3, 1))
1208
1209         if False:
1210             instrs.append((2, 1, 2, 3))
1211
1212         if False:
1213             instrs.append((2, 6, 2, 1))
1214             instrs.append((2, 1, 2, 0))
1215
1216         if False:
1217             instrs.append((1, 2, 7, 2))
1218             instrs.append((7, 1, 5, 0))
1219             instrs.append((4, 4, 1, 1))
1220
1221         if False:
1222             instrs.append((5, 6, 2, 2))
1223             instrs.append((1, 1, 4, 1))
1224             instrs.append((6, 5, 3, 0))
1225
1226         if False:
1227             # Write-after-Write Hazard
1228             instrs.append((3, 6, 7, 2))
1229             instrs.append((4, 4, 7, 1))
1230
1231         if False:
1232             # self-read/write-after-write followed by Read-after-Write
1233             instrs.append((1, 1, 1, 1))
1234             instrs.append((1, 5, 3, 0))
1235
1236         if False:
1237             # Read-after-Write followed by self-read-after-write
1238             instrs.append((5, 6, 1, 2))
1239             instrs.append((1, 1, 1, 1))
1240
1241         if False:
1242             # self-read-write sandwich
1243             instrs.append((5, 6, 1, 2))
1244             instrs.append((1, 1, 1, 1))
1245             instrs.append((1, 5, 3, 0))
1246
1247         if False:
1248             # very weird failure
1249             instrs.append((5, 2, 5, 2))
1250             instrs.append((2, 6, 3, 0))
1251             instrs.append((4, 2, 2, 1))
1252
1253         if False:
1254             v1 = 4
1255             yield dut.intregs.regs[5].reg.eq(v1)
1256             alusim.setval(5, v1)
1257             yield dut.intregs.regs[3].reg.eq(5)
1258             alusim.setval(3, 5)
1259             instrs.append((5, 3, 3, 4, (0, 0)))
1260             instrs.append((4, 2, 1, 2, (0, 1)))
1261
1262         if False:
1263             v1 = 6
1264             yield dut.intregs.regs[5].reg.eq(v1)
1265             alusim.setval(5, v1)
1266             yield dut.intregs.regs[3].reg.eq(5)
1267             alusim.setval(3, 5)
1268             instrs.append((5, 3, 3, 4, (0, 0)))
1269             instrs.append((4, 2, 1, 2, (1, 0)))
1270
1271         if False:
1272             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1273             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1274             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1275             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1276             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1277             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1278             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1279             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1280             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1281
1282         # issue instruction(s), wait for issue to be free before proceeding
1283         for i, instr in enumerate(instrs):
1284             print(i, instr)
1285             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1286
1287             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1288                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1289             alusim.op(op, opi, imm, src1, src2, dest)
1290             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1291                                br_ok, br_fail)
1292
1293         # wait for all instructions to stop before checking
1294         while True:
1295             iqlen = yield dut.qlen_o
1296             if iqlen == 0:
1297                 break
1298             yield
1299         yield
1300         yield
1301         yield
1302         yield
1303         yield from wait_for_busy_clear(dut)
1304
1305         # check status
1306         yield from alusim.check(dut)
1307         yield from alusim.dump(dut)
1308
1309
1310 def test_scoreboard():
1311     regwidth = 64
1312     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1313     alusim = RegSim(regwidth, 8)
1314     memsim = MemSim(16, 8)
1315
1316     m = Module()
1317     comb = m.d.comb
1318     instruction = Signal(32)
1319
1320     # set up the decoder (and simulator, later)
1321     pdecode = create_pdecode()
1322     #simulator = ISA(pdecode, initial_regs)
1323
1324     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1325     m.submodules.sim = dut
1326
1327     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1328     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1329
1330     vl = rtlil.convert(m, ports=dut.ports())
1331     with open("test_scoreboard6600.il", "w") as f:
1332         f.write(vl)
1333
1334     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1335                    vcd_name='test_powerboard6600.vcd')
1336
1337     # run_simulation(dut, scoreboard_sim(dut, alusim),
1338     #               vcd_name='test_scoreboard6600.vcd')
1339
1340     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1341     #                    vcd_name='test_scoreboard6600.vcd')
1342
1343
1344 if __name__ == '__main__':
1345     test_scoreboard()