src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, ortreereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst_multi import LDSTCompUnit
  20 from soc.experiment.compldst_multi import CompLDSTOpSubset
  21 from soc.experiment.l0_cache import TstL0CacheBuffer
  22
  23 from soc.experiment.alu_hier import ALU, BranchALU
  24 from soc.fu.alu.alu_input_record import CompALUOpSubset
  25
  26 from soc.decoder.power_enums import MicrOp, Function
  27 from soc.decoder.power_decoder import (create_pdecode)
  28 from soc.decoder.power_decoder2 import (PowerDecode2)
  29 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
  30
  31 from soc.simulator.program import Program
  32
  33
  34 from nmutil.latch import SRLatch
  35 from nmutil.nmoperator import eq
  36
  37 from random import randint, seed
  38 from copy import deepcopy
  39 from math import log
  40
  41 from soc.experiment.sim import RegSim, MemSim
  42 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  43
  44
  45 class CompUnitsBase(Elaboratable):
  46     """ Computation Unit Base class.
  47
  48         Amazingly, this class works recursively.  It's supposed to just
  49         look after some ALUs (that can handle the same operations),
  50         grouping them together, however it turns out that the same code
  51         can also group *groups* of Computation Units together as well.
  52
  53         Basically it was intended just to concatenate the ALU's issue,
  54         go_rd etc. signals together, which start out as bits and become
  55         sequences.  Turns out that the same trick works just as well
  56         on Computation Units!
  57
  58         So this class may be used recursively to present a top-level
  59         sequential concatenation of all the signals in and out of
  60         ALUs, whilst at the same time making it convenient to group
  61         ALUs together.
  62
  63         At the lower level, the intent is that groups of (identical)
  64         ALUs may be passed the same operation.  Even beyond that,
  65         the intent is that that group of (identical) ALUs actually
  66         share the *same pipeline* and as such become a "Concurrent
  67         Computation Unit" as defined by Mitch Alsup (see section
  68         11.4.9.3)
  69     """
  70
  71     def __init__(self, rwid, units, ldstmode=False):
  72         """ Inputs:
  73
  74             * :rwid:   bit width of register file(s) - both FP and INT
  75             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  76         """
  77         self.units = units
  78         self.ldstmode = ldstmode
  79         self.rwid = rwid
  80         self.rwid = rwid
  81         if units and isinstance(units[0], CompUnitsBase):
  82             self.n_units = 0
  83             for u in self.units:
  84                 self.n_units += u.n_units
  85         else:
  86             self.n_units = len(units)
  87
  88         n_units = self.n_units
  89
  90         # inputs
  91         self.issue_i = Signal(n_units, reset_less=True)
  92         self.rd0 = go_record(n_units, "rd0")
  93         self.rd1 = go_record(n_units, "rd1")
  94         self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
  95         self.wr0 = go_record(n_units, "wr0")
  96         self.go_wr_i = [self.wr0.go]
  97         self.shadown_i = Signal(n_units, reset_less=True)
  98         self.go_die_i = Signal(n_units, reset_less=True)
  99         if ldstmode:
 100             self.go_ad_i = Signal(n_units, reset_less=True)
 101             self.go_st_i = Signal(n_units, reset_less=True)
 102
 103         # outputs
 104         self.busy_o = Signal(n_units, reset_less=True)
 105         self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
 106         self.req_rel_o = self.wr0.rel
 107         self.done_o = Signal(n_units, reset_less=True)
 108         if ldstmode:
 109             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 110             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 111             self.adr_rel_o = Signal(n_units, reset_less=True)
 112             self.sto_rel_o = Signal(n_units, reset_less=True)
 113             self.load_mem_o = Signal(n_units, reset_less=True)
 114             self.stwd_mem_o = Signal(n_units, reset_less=True)
 115             self.addr_o = Signal(rwid, reset_less=True)
 116
 117         # in/out register data (note: not register#, actual data)
 118         self.data_o = Signal(rwid, reset_less=True)
 119         self.src1_i = Signal(rwid, reset_less=True)
 120         self.src2_i = Signal(rwid, reset_less=True)
 121         # input operand
 122
 123     def elaborate(self, platform):
 124         m = Module()
 125         comb = m.d.comb
 126
 127         for i, alu in enumerate(self.units):
 128             setattr(m.submodules, "comp%d" % i, alu)
 129
 130         go_rd_l0 = []
 131         go_rd_l1 = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         done_l = []
 137         rd_rel0_l = []
 138         rd_rel1_l = []
 139         shadow_l = []
 140         godie_l = []
 141         for alu in self.units:
 142             req_rel_l.append(alu.req_rel_o)
 143             done_l.append(alu.done_o)
 144             shadow_l.append(alu.shadown_i)
 145             godie_l.append(alu.go_die_i)
 146             print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 147             rd_rel0_l.append(alu.rd_rel_o[0])
 148             rd_rel1_l.append(alu.rd_rel_o[1])
 149             go_wr_l.append(alu.go_wr_i)
 150             go_rd_l0.append(alu.go_rd_i[0])
 151             go_rd_l1.append(alu.go_rd_i[1])
 152             issue_l.append(alu.issue_i)
 153             busy_l.append(alu.busy_o)
 154         comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
 155         comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
 156         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 157         comb += self.done_o.eq(Cat(*done_l))
 158         comb += self.busy_o.eq(Cat(*busy_l))
 159         comb += Cat(*godie_l).eq(self.go_die_i)
 160         comb += Cat(*shadow_l).eq(self.shadown_i)
 161         comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
 162         comb += Cat(*go_rd_l0).eq(self.rd0.go)
 163         comb += Cat(*go_rd_l1).eq(self.rd1.go)
 164         comb += Cat(*issue_l).eq(self.issue_i)
 165
 166         # connect data register input/output
 167
 168         # merge (OR) all integer FU / ALU outputs to a single value
 169         # XXX NOTE: this only works because there is a single "port"
 170         # protected by a single go_wr.  multi-issue requires a bus
 171         # to be inserted here.
 172         if self.units:
 173             data_o = ortreereduce(self.units, "data_o")
 174             comb += self.data_o.eq(data_o)
 175             if self.ldstmode:
 176                 addr_o = ortreereduce(self.units, "addr_o")
 177                 comb += self.addr_o.eq(addr_o)
 178
 179         for i, alu in enumerate(self.units):
 180             comb += alu.src1_i.eq(self.src1_i)
 181             comb += alu.src2_i.eq(self.src2_i)
 182
 183         if not self.ldstmode:
 184             return m
 185
 186         ldmem_l = []
 187         stmem_l = []
 188         go_ad_l = []
 189         go_st_l = []
 190         ld_l = []
 191         st_l = []
 192         adr_rel_l = []
 193         sto_rel_l = []
 194         for alu in self.units:
 195             ld_l.append(alu.ld_o)
 196             st_l.append(alu.st_o)
 197             adr_rel_l.append(alu.adr_rel_o)
 198             sto_rel_l.append(alu.sto_rel_o)
 199             ldmem_l.append(alu.load_mem_o)
 200             stmem_l.append(alu.stwd_mem_o)
 201             go_ad_l.append(alu.go_ad_i)
 202             go_st_l.append(alu.go_st_i)
 203         comb += self.ld_o.eq(Cat(*ld_l))
 204         comb += self.st_o.eq(Cat(*st_l))
 205         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 206         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 207         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 208         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 209         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 210         comb += Cat(*go_st_l).eq(self.go_st_i)
 211
 212         return m
 213
 214
 215 class CompUnitLDSTs(CompUnitsBase):
 216
 217     def __init__(self, rwid, opwid, n_ldsts, l0):
 218         """ Inputs:
 219
 220             * :rwid:   bit width of register file(s) - both FP and INT
 221             * :opwid:  operand bit width
 222         """
 223         self.opwid = opwid
 224
 225         # inputs
 226         self.op = CompLDSTOpSubset("cul_i")
 227
 228         # LD/ST Units
 229         units = []
 230         for i in range(n_ldsts):
 231             pi = l0.l0.dports[i].pi
 232             units.append(LDSTCompUnit(pi, rwid, awid=48))
 233
 234         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 235
 236     def elaborate(self, platform):
 237         m = CompUnitsBase.elaborate(self, platform)
 238         comb = m.d.comb
 239
 240         # hand the same operation to all units
 241         for ldst in self.units:
 242             comb += ldst.oper_i.eq(self.op)
 243
 244         return m
 245
 246
 247 class CompUnitALUs(CompUnitsBase):
 248
 249     def __init__(self, rwid, opwid, n_alus):
 250         """ Inputs:
 251
 252             * :rwid:   bit width of register file(s) - both FP and INT
 253             * :opwid:  operand bit width
 254         """
 255         self.opwid = opwid
 256
 257         # inputs
 258         self.op = CompALUOpSubset("cua_i")
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3  # extra bit for immediate mode
 268             units.append(MultiCompUnit(rwid, alu, CompALUOpSubset))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the subset of operation to ALUs
 277         for alu in self.units:
 278             comb += alu.oper_i.eq(self.op)
 279
 280         return m
 281
 282
 283 class CompUnitBR(CompUnitsBase):
 284
 285     def __init__(self, rwid, opwid):
 286         """ Inputs:
 287
 288             * :rwid:   bit width of register file(s) - both FP and INT
 289             * :opwid:  operand bit width
 290
 291             Note: bgt unit is returned so that a shadow unit can be created
 292             for it
 293         """
 294         self.opwid = opwid
 295
 296         # inputs
 297         self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3  # extra bit for immediate mode
 304         self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             #comb += alu.oper_i.eq(self.op) # TODO
 314             comb += alu.oper_i.eq(self.oper_i)
 315             #comb += alu.imm_i.eq(self.imm_i)
 316
 317         return m
 318
 319
 320 class FunctionUnits(Elaboratable):
 321
 322     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 323         self.n_src, self.n_dst = n_src, n_dst
 324         self.n_reg = n_reg
 325         self.n_int_alus = nf = n_int_alus
 326
 327         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 328         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 329
 330         self.readable_o = Signal(n_int_alus, reset_less=True)
 331         self.writable_o = Signal(n_int_alus, reset_less=True)
 332
 333         # arrays
 334         src = []
 335         rsel = []
 336         rd = []
 337         for i in range(n_src):
 338             j = i + 1 # name numbering to match src1/src2
 339             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 340             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 341             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 342         dst = []
 343         dsel = []
 344         wr = []
 345         for i in range(n_dst):
 346             j = i + 1 # name numbering to match src1/src2
 347             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 348             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 349             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 350         wpnd = []
 351         pend = []
 352         for i in range(nf):
 353             j = i + 1 # name numbering to match src1/src2
 354             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 355             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 356
 357         self.dest_i = Array(dst)     # Dest in (top)
 358         self.src_i = Array(src)      # oper in (top)
 359
 360         # for Register File Select Lines (horizontal), per-reg
 361         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 362         self.src_rsel_o = Array(rsel)  # src reg (bot)
 363
 364         self.go_rd_i = Array(rd)
 365         self.go_wr_i = Array(wr)
 366
 367         self.go_die_i = Signal(n_int_alus, reset_less=True)
 368         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 369
 370         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 371
 372     def elaborate(self, platform):
 373         m = Module()
 374         comb = m.d.comb
 375         sync = m.d.sync
 376
 377         n_intfus = self.n_int_alus
 378
 379         # Integer FU-FU Dep Matrix
 380         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 381         m.submodules.intfudeps = intfudeps
 382         # Integer FU-Reg Dep Matrix
 383         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 384         m.submodules.intregdeps = intregdeps
 385
 386         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 387         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 388
 389         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 390         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 391
 392         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 393         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 394         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 395
 396         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 397         comb += intfudeps.go_die_i.eq(self.go_die_i)
 398         comb += self.readable_o.eq(intfudeps.readable_o)
 399         comb += self.writable_o.eq(intfudeps.writable_o)
 400
 401         # Connect function issue / arrays, and dest/src1/src2
 402         for i in range(self.n_src):
 403             print (i, self.go_rd_i, intfudeps.go_rd_i)
 404             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 405             comb += intregdeps.src_i[i].eq(self.src_i[i])
 406             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 407             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 408         for i in range(self.n_dst):
 409             print (i, self.go_wr_i, intfudeps.go_wr_i)
 410             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 411             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 412             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 413             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 414         comb += intregdeps.go_die_i.eq(self.go_die_i)
 415         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 416
 417         return m
 418
 419
 420 class Scoreboard(Elaboratable):
 421     def __init__(self, rwid, n_regs):
 422         """ Inputs:
 423
 424             * :rwid:   bit width of register file(s) - both FP and INT
 425             * :n_regs: depth of register file(s) - number of FP and INT regs
 426         """
 427         self.rwid = rwid
 428         self.n_regs = n_regs
 429
 430         # Register Files
 431         self.intregs = RegFileArray(rwid, n_regs)
 432         self.fpregs = RegFileArray(rwid, n_regs)
 433
 434         # Memory (test for now)
 435         self.l0 = TstL0CacheBuffer()
 436
 437         # issue q needs to get at these
 438         self.aluissue = IssueUnitGroup(2)
 439         self.lsissue = IssueUnitGroup(2)
 440         self.brissue = IssueUnitGroup(1)
 441         # and these
 442         self.instr = Decode2ToExecute1Type("sc_instr")
 443         self.br_oper_i = Signal(4, reset_less=True)
 444         self.br_imm_i = Signal(rwid, reset_less=True)
 445         self.ls_oper_i = Signal(4, reset_less=True)
 446
 447         # inputs
 448         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 449         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 450         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 451         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 452
 453         # outputs
 454         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 455         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 456
 457         # for branch speculation experiment.  branch_direction = 0 if
 458         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 459         # branch_succ and branch_fail are requests to have the current
 460         # instruction be dependent on the branch unit "shadow" capability.
 461         self.branch_succ_i = Signal(reset_less=True)
 462         self.branch_fail_i = Signal(reset_less=True)
 463         self.branch_direction_o = Signal(2, reset_less=True)
 464
 465     def elaborate(self, platform):
 466         m = Module()
 467         comb = m.d.comb
 468         sync = m.d.sync
 469
 470         m.submodules.intregs = self.intregs
 471         m.submodules.fpregs = self.fpregs
 472         m.submodules.l0 = l0 = self.l0
 473
 474         # register ports
 475         int_dest = self.intregs.write_port("dest")
 476         int_src1 = self.intregs.read_port("src1")
 477         int_src2 = self.intregs.read_port("src2")
 478
 479         fp_dest = self.fpregs.write_port("dest")
 480         fp_src1 = self.fpregs.read_port("src1")
 481         fp_src2 = self.fpregs.read_port("src2")
 482
 483         # Int ALUs and BR ALUs
 484         n_int_alus = 5
 485         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 486         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 487
 488         # LDST Comp Units
 489         n_ldsts = 2
 490         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
 491
 492         # Comp Units
 493         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 494         bgt = cub.bgt  # get at the branch computation unit
 495         br1 = cub.br1
 496
 497         # Int FUs
 498         fu_n_src = 2
 499         fu_n_dst = 1
 500         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 501                                                      fu_n_src, fu_n_dst)
 502
 503         # Memory FUs
 504         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 505
 506         # Memory Priority Picker 1: one gateway per memory port
 507         # picks 1 reader and 1 writer to intreg
 508         mempick1 = GroupPicker(n_ldsts, 1, 1)
 509         m.submodules.mempick1 = mempick1
 510
 511         # Count of number of FUs
 512         n_intfus = n_int_alus
 513         n_fp_fus = 0  # for now
 514
 515         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 516         # picks 1 reader and 1 writer to intreg
 517         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 518         m.submodules.intpick1 = ipick1
 519
 520         # INT/FP Issue Unit
 521         regdecode = RegDecode(self.n_regs)
 522         m.submodules.regdecode = regdecode
 523         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 524         m.submodules.issueunit = issueunit
 525
 526         # Shadow Matrix.  currently n_intfus shadows, to be used for
 527         # write-after-write hazards.  NOTE: there is one extra for branches,
 528         # so the shadow width is increased by 1
 529         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 530         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 531
 532         # record previous instruction to cast shadow on current instruction
 533         prev_shadow = Signal(n_intfus)
 534
 535         # Branch Speculation recorder.  tracks the success/fail state as
 536         # each instruction is issued, so that when the branch occurs the
 537         # allow/cancel can be issued as appropriate.
 538         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 539
 540         # ---------
 541         # ok start wiring things together...
 542         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 543         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 544         # ---------
 545
 546         # ---------
 547         # Issue Unit is where it starts.  set up some in/outs for this module
 548         # ---------
 549         comb += [regdecode.dest_i.eq(self.int_dest_i),
 550                  regdecode.src1_i.eq(self.int_src1_i),
 551                  regdecode.src2_i.eq(self.int_src2_i),
 552                  regdecode.enable_i.eq(self.reg_enable_i),
 553                  self.issue_o.eq(issueunit.issue_o)
 554                  ]
 555
 556         # take these to outside (issue needs them)
 557         comb += cua.op.eq_from_execute1(self.instr)
 558         comb += cub.oper_i.eq(self.br_oper_i)
 559         comb += cub.imm_i.eq(self.br_imm_i)
 560         comb += cul.op.eq_from_execute1(self.instr)
 561
 562         # TODO: issueunit.f (FP)
 563
 564         # and int function issue / busy arrays, and dest/src1/src2
 565         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 566         comb += intfus.src_i[0].eq(regdecode.src1_o)
 567         comb += intfus.src_i[1].eq(regdecode.src2_o)
 568
 569         fn_issue_o = issueunit.fn_issue_o
 570
 571         comb += intfus.fn_issue_i.eq(fn_issue_o)
 572         comb += issueunit.busy_i.eq(cu.busy_o)
 573         comb += self.busy_o.eq(cu.busy_o.bool())
 574
 575         # ---------
 576         # Memory Function Unit
 577         # ---------
 578         reset_b = Signal(cul.n_units, reset_less=True)
 579         # XXX was cul.go_wr_i not done.o
 580         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 581         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 582
 583         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 584         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 585         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 586
 587         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 588         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 589         # issue_i.  multi-issue gets a bit more complex but not a lot.
 590         prior_ldsts = Signal(cul.n_units, reset_less=True)
 591         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 592         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 593             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 594         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 595             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 596
 597         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 598         # just immediately activate go_adr
 599         sync += cul.go_ad_i.eq(cul.adr_rel_o)
 600
 601         # connect up address data
 602         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 603         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 604
 605         # connect loadable / storable to go_ld/go_st.
 606         # XXX should only be done when the memory ld/st has actually happened!
 607         go_st_i = Signal(cul.n_units, reset_less=True)
 608         go_ld_i = Signal(cul.n_units, reset_less=True)
 609         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 610                            cul.adr_rel_o & cul.ld_o)
 611         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 612                            cul.sto_rel_o & cul.st_o)
 613         comb += memfus.go_ld_i.eq(go_ld_i)
 614         comb += memfus.go_st_i.eq(go_st_i)
 615         #comb += cul.go_wr_i.eq(go_ld_i)
 616         comb += cul.go_st_i.eq(go_st_i)
 617
 618         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 619         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 620         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 621
 622         # ---------
 623         # merge shadow matrices outputs
 624         # ---------
 625
 626         # these are explained in ShadowMatrix docstring, and are to be
 627         # connected to the FUReg and FUFU Matrices, to get them to reset
 628         anydie = Signal(n_intfus, reset_less=True)
 629         allshadown = Signal(n_intfus, reset_less=True)
 630         shreset = Signal(n_intfus, reset_less=True)
 631         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 632         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 633         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 634
 635         # ---------
 636         # connect fu-fu matrix
 637         # ---------
 638
 639         # Group Picker... done manually for now.
 640         go_rd_o = ipick1.go_rd_o
 641         go_wr_o = ipick1.go_wr_o
 642         go_rd_i = intfus.go_rd_i
 643         go_wr_i = intfus.go_wr_i
 644         go_die_i = intfus.go_die_i
 645         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 646         for i in range(fu_n_src):
 647             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 648         for i in range(fu_n_dst):
 649             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 650         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 651
 652         # Connect Picker
 653         # ---------
 654         int_rd_o = intfus.readable_o
 655         rrel_o = cu.rd_rel_o
 656         rqrl_o = cu.req_rel_o
 657         for i in range(fu_n_src):
 658             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 659             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 660         int_wr_o = intfus.writable_o
 661         for i in range(fu_n_dst):
 662             # XXX FIXME: rqrl_o[i] here
 663             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 664             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 665
 666         # ---------
 667         # Shadow Matrix
 668         # ---------
 669
 670         comb += shadows.issue_i.eq(fn_issue_o)
 671         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 672         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 673         # ---------
 674         # NOTE; this setup is for the instruction order preservation...
 675
 676         # connect shadows / go_dies to Computation Units
 677         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 678         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 679
 680         # ok connect first n_int_fu shadows to busy lines, to create an
 681         # instruction-order linked-list-like arrangement, using a bit-matrix
 682         # (instead of e.g. a ring buffer).
 683
 684         # when written, the shadow can be cancelled (and was good)
 685         for i in range(n_intfus):
 686             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 687             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 688             # because the comp unit is only free once completed
 689             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 690
 691         # *previous* instruction shadows *current* instruction, and, obviously,
 692         # if the previous is completed (!busy) don't cast the shadow!
 693         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 694         for i in range(n_intfus):
 695             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 696
 697         # ---------
 698         # ... and this is for branch speculation.  it uses the extra bit
 699         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 700         # only needs to set shadow_i, s_fail_i and s_good_i
 701
 702         # issue captures shadow_i (if enabled)
 703         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 704
 705         bactive = Signal(reset_less=True)
 706         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 707
 708         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 709         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 710             comb += bshadow.issue_i.eq(fn_issue_o)
 711             for i in range(n_intfus):
 712                 with m.If(fn_issue_o & (Const(1 << i))):
 713                     comb += bshadow.shadow_i[i][0].eq(1)
 714
 715         # finally, we need an indicator to the test infrastructure as to
 716         # whether the branch succeeded or failed, plus, link up to the
 717         # "recorder" of whether the instruction was under shadow or not
 718
 719         with m.If(br1.issue_i):
 720             sync += bspec.active_i.eq(1)
 721         with m.If(self.branch_succ_i):
 722             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 723         with m.If(self.branch_fail_i):
 724             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 725
 726         # branch is active (TODO: a better signal: this is over-using the
 727         # go_write signal - actually the branch should not be "writing")
 728         with m.If(br1.go_wr_i):
 729             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 730             sync += bspec.active_i.eq(0)
 731             comb += bspec.br_i.eq(1)
 732             # branch occurs if data == 1, failed if data == 0
 733             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 734             for i in range(n_intfus):
 735                 # *expected* direction of the branch matched against *actual*
 736                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 737                 # ... or it didn't
 738                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 739
 740         # ---------
 741         # Connect Register File(s)
 742         # ---------
 743         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 744         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 745         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 746
 747         # connect ALUs to regfile
 748         comb += int_dest.data_i.eq(cu.data_o)
 749         comb += cu.src1_i.eq(int_src1.data_o)
 750         comb += cu.src2_i.eq(int_src2.data_o)
 751
 752         # connect ALU Computation Units
 753         for i in range(fu_n_src):
 754             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 755         for i in range(fu_n_dst):
 756             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 757         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 758
 759         return m
 760
 761     def __iter__(self):
 762         yield from self.intregs
 763         yield from self.fpregs
 764         yield self.int_dest_i
 765         yield self.int_src1_i
 766         yield self.int_src2_i
 767         yield self.issue_o
 768         yield self.branch_succ_i
 769         yield self.branch_fail_i
 770         yield self.branch_direction_o
 771
 772     def ports(self):
 773         return list(self)
 774
 775
 776 class IssueToScoreboard(Elaboratable):
 777
 778     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 779         self.qlen = qlen
 780         self.n_in = n_in
 781         self.n_out = n_out
 782         self.rwid = rwid
 783         self.opw = opwid
 784         self.n_regs = n_regs
 785
 786         mqbits = unsigned(int(log(qlen) / log(2))+2)
 787         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 788         self.p_ready_o = Signal()  # instructions were added
 789         self.data_i = Instruction._nq(n_in, "data_i")
 790
 791         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 792         self.qlen_o = Signal(mqbits, reset_less=True)
 793
 794     def elaborate(self, platform):
 795         m = Module()
 796         comb = m.d.comb
 797         sync = m.d.sync
 798
 799         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 800                           self.n_in, self.n_out)
 801         sc = Scoreboard(self.rwid, self.n_regs)
 802         m.submodules.iq = iq
 803         m.submodules.sc = sc
 804
 805         # get at the regfile for testing
 806         self.intregs = sc.intregs
 807
 808         # and the "busy" signal and instruction queue length
 809         comb += self.busy_o.eq(sc.busy_o)
 810         comb += self.qlen_o.eq(iq.qlen_o)
 811
 812         # link up instruction queue
 813         comb += iq.p_add_i.eq(self.p_add_i)
 814         comb += self.p_ready_o.eq(iq.p_ready_o)
 815         for i in range(self.n_in):
 816             comb += eq(iq.data_i[i], self.data_i[i])
 817
 818         # take instruction and process it.  note that it's possible to
 819         # "inspect" the queue contents *without* actually removing the
 820         # items.  items are only removed when the
 821
 822         # in "waiting" state
 823         wait_issue_br = Signal()
 824         wait_issue_alu = Signal()
 825         wait_issue_ls = Signal()
 826
 827         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 828             # set instruction pop length to 1 if the unit accepted
 829             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 830                 with m.If(iq.qlen_o != 0):
 831                     comb += iq.n_sub_i.eq(1)
 832             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 833                 with m.If(iq.qlen_o != 0):
 834                     comb += iq.n_sub_i.eq(1)
 835             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 836                 with m.If(iq.qlen_o != 0):
 837                     comb += iq.n_sub_i.eq(1)
 838
 839         # see if some instruction(s) are here.  note that this is
 840         # "inspecting" the in-place queue.  note also that on the
 841         # cycle following "waiting" for fn_issue_o to be set, the
 842         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 843         with m.If(iq.qlen_o != 0):
 844             # get the operands and operation
 845             instr = iq.data_o[0]
 846             imm = instr.imm_data.data
 847             dest = instr.write_reg.data
 848             src1 = instr.read_reg1.data
 849             src2 = instr.read_reg2.data
 850             op = instr.insn_type
 851             fu = instr.fn_unit
 852             opi = instr.imm_data.ok  # immediate set
 853
 854             # set the src/dest regs
 855             comb += sc.int_dest_i.eq(dest)
 856             comb += sc.int_src1_i.eq(src1)
 857             comb += sc.int_src2_i.eq(src2)
 858             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 859             comb += sc.instr.eq(instr)
 860
 861             # choose a Function-Unit-Group
 862             with m.If(fu == Function.ALU):  # alu
 863                 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
 864                 comb += wait_issue_alu.eq(1)
 865             with m.Elif(fu == Function.LDST):  # ld/st
 866                 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
 867                 comb += wait_issue_ls.eq(1)
 868
 869             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 870                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 871                 comb += sc.br_imm_i.eq(imm)
 872                 comb += sc.brissue.insn_i.eq(1)
 873                 comb += wait_issue_br.eq(1)
 874             # XXX TODO
 875             # these indicate that the instruction is to be made
 876             # shadow-dependent on
 877             # (either) branch success or branch fail
 878             # yield sc.branch_fail_i.eq(branch_fail)
 879             # yield sc.branch_succ_i.eq(branch_success)
 880
 881         return m
 882
 883     def __iter__(self):
 884         yield self.p_ready_o
 885         for o in self.data_i:
 886             yield from list(o)
 887         yield self.p_add_i
 888
 889     def ports(self):
 890         return list(self)
 891
 892
 893 def power_instr_q(dut, pdecode2, ins, code):
 894     instrs = [pdecode2.e]
 895
 896     sendlen = 1
 897     for idx, instr in enumerate(instrs):
 898         yield dut.data_i[idx].eq(instr)
 899         insn_type = yield instr.insn_type
 900         fn_unit = yield instr.fn_unit
 901         print("senddata ", idx, insn_type, fn_unit, instr)
 902     yield dut.p_add_i.eq(sendlen)
 903     yield
 904     o_p_ready = yield dut.p_ready_o
 905     while not o_p_ready:
 906         yield
 907         o_p_ready = yield dut.p_ready_o
 908
 909     yield dut.p_add_i.eq(0)
 910
 911
 912 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 913             branch_success, branch_fail):
 914     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 915                 'imm_data': (imm, op_imm),
 916                'read_reg1': src1, 'read_reg2': src2}]
 917
 918     sendlen = 1
 919     for idx, instr in enumerate(instrs):
 920         imm, op_imm = instr['imm_data']
 921         reg1 = instr['read_reg1']
 922         reg2 = instr['read_reg2']
 923         dest = instr['write_reg']
 924         insn_type = instr['insn_type']
 925         fn_unit = instr['fn_unit']
 926         yield dut.data_i[idx].insn_type.eq(insn_type)
 927         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 928         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 929         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 930         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 931         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 932         yield dut.data_i[idx].write_reg.data.eq(dest)
 933         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 934         yield dut.data_i[idx].imm_data.data.eq(imm)
 935         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 936         di = yield dut.data_i[idx]
 937         print("senddata %d %x" % (idx, di))
 938     yield dut.p_add_i.eq(sendlen)
 939     yield
 940     o_p_ready = yield dut.p_ready_o
 941     while not o_p_ready:
 942         yield
 943         o_p_ready = yield dut.p_ready_o
 944
 945     yield dut.p_add_i.eq(0)
 946
 947
 948 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 949     yield from disable_issue(dut)
 950     yield dut.int_dest_i.eq(dest)
 951     yield dut.int_src1_i.eq(src1)
 952     yield dut.int_src2_i.eq(src2)
 953     if (op & (0x3 << 2)) != 0:  # branch
 954         yield dut.brissue.insn_i.eq(1)
 955         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 956         yield dut.br_imm_i.eq(imm)
 957         dut_issue = dut.brissue
 958     else:
 959         yield dut.aluissue.insn_i.eq(1)
 960         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 961         yield dut.alu_imm_i.eq(imm)
 962         dut_issue = dut.aluissue
 963     yield dut.reg_enable_i.eq(1)
 964
 965     # these indicate that the instruction is to be made shadow-dependent on
 966     # (either) branch success or branch fail
 967     yield dut.branch_fail_i.eq(branch_fail)
 968     yield dut.branch_succ_i.eq(branch_success)
 969
 970     yield
 971     yield from wait_for_issue(dut, dut_issue)
 972
 973
 974 def print_reg(dut, rnums):
 975     rs = []
 976     for rnum in rnums:
 977         reg = yield dut.intregs.regs[rnum].reg
 978         rs.append("%x" % reg)
 979     rnums = map(str, rnums)
 980     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 981
 982
 983 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 984     insts = []
 985     for i in range(n_ops):
 986         src1 = randint(1, dut.n_regs-1)
 987         src2 = randint(1, dut.n_regs-1)
 988         imm = randint(1, (1 << dut.rwid)-1)
 989         dest = randint(1, dut.n_regs-1)
 990         op = randint(0, max_opnums)
 991         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 992
 993         if shadowing:
 994             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 995         else:
 996             insts.append((src1, src2, dest, op, opi, imm))
 997     return insts
 998
 999
1000 def wait_for_busy_clear(dut):
1001     while True:
1002         busy_o = yield dut.busy_o
1003         if not busy_o:
1004             break
1005         print("busy",)
1006         yield
1007
1008
1009 def disable_issue(dut):
1010     yield dut.aluissue.insn_i.eq(0)
1011     yield dut.brissue.insn_i.eq(0)
1012     yield dut.lsissue.insn_i.eq(0)
1013
1014
1015 def wait_for_issue(dut, dut_issue):
1016     while True:
1017         issue_o = yield dut_issue.fn_issue_o
1018         if issue_o:
1019             yield from disable_issue(dut)
1020             yield dut.reg_enable_i.eq(0)
1021             break
1022         print("busy",)
1023         # yield from print_reg(dut, [1,2,3])
1024         yield
1025     # yield from print_reg(dut, [1,2,3])
1026
1027
1028 def scoreboard_branch_sim(dut, alusim):
1029
1030     iseed = 3
1031
1032     for i in range(1):
1033
1034         print("rseed", iseed)
1035         seed(iseed)
1036         iseed += 1
1037
1038         yield dut.branch_direction_o.eq(0)
1039
1040         # set random values in the registers
1041         for i in range(1, dut.n_regs):
1042             val = 31+i*3
1043             val = randint(0, (1 << alusim.rwidth)-1)
1044             yield dut.intregs.regs[i].reg.eq(val)
1045             alusim.setval(i, val)
1046
1047         if False:
1048             # create some instructions: branches create a tree
1049             insts = create_random_ops(dut, 1, True, 1)
1050             #insts.append((6, 6, 1, 2, (0, 0)))
1051             #insts.append((4, 3, 3, 0, (0, 0)))
1052
1053             src1 = randint(1, dut.n_regs-1)
1054             src2 = randint(1, dut.n_regs-1)
1055             #op = randint(4, 7)
1056             op = 4  # only BGT at the moment
1057
1058             branch_ok = create_random_ops(dut, 1, True, 1)
1059             branch_fail = create_random_ops(dut, 1, True, 1)
1060
1061             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1062
1063         if True:
1064             insts = []
1065             insts.append((3, 5, 2, 0, (0, 0)))
1066             branch_ok = []
1067             branch_fail = []
1068             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1069             branch_ok.append(None)
1070             branch_fail.append((1, 1, 2, 0, (0, 1)))
1071             #branch_fail.append( None )
1072             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1073
1074         siminsts = deepcopy(insts)
1075
1076         # issue instruction(s)
1077         i = -1
1078         instrs = insts
1079         branch_direction = 0
1080         while instrs:
1081             yield
1082             yield
1083             i += 1
1084             branch_direction = yield dut.branch_direction_o  # way branch went
1085             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1086             if branch_direction == 1 and shadow_on:
1087                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1088                 continue  # branch was "success" and this is a "failed"... skip
1089             if branch_direction == 2 and shadow_off:
1090                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1091                 continue  # branch was "fail" and this is a "success"... skip
1092             if branch_direction != 0:
1093                 shadow_on = 0
1094                 shadow_off = 0
1095             is_branch = op >= 4
1096             if is_branch:
1097                 branch_ok, branch_fail = dest
1098                 dest = src2
1099                 # ok zip up the branch success / fail instructions and
1100                 # drop them into the queue, one marked "to have branch success"
1101                 # the other to be marked shadow branch "fail".
1102                 # one out of each of these will be cancelled
1103                 for ok, fl in zip(branch_ok, branch_fail):
1104                     if ok:
1105                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1106                     if fl:
1107                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1108             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1109                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1110             yield from int_instr(dut, op, src1, src2, dest,
1111                                  shadow_on, shadow_off)
1112
1113         # wait for all instructions to stop before checking
1114         yield
1115         yield from wait_for_busy_clear(dut)
1116
1117         i = -1
1118         while siminsts:
1119             instr = siminsts.pop(0)
1120             if instr is None:
1121                 continue
1122             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1123             i += 1
1124             is_branch = op >= 4
1125             if is_branch:
1126                 branch_ok, branch_fail = dest
1127                 dest = src2
1128             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1129                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1130             branch_res = alusim.op(op, src1, src2, dest)
1131             if is_branch:
1132                 if branch_res:
1133                     siminsts += branch_ok
1134                 else:
1135                     siminsts += branch_fail
1136
1137         # check status
1138         yield from alusim.check(dut)
1139         yield from alusim.dump(dut)
1140
1141
1142 def power_sim(m, dut, pdecode2, instruction, alusim):
1143
1144     seed(0)
1145
1146     for i in range(1):
1147
1148         # set random values in the registers
1149         for i in range(1, dut.n_regs):
1150             #val = randint(0, (1<<alusim.rwidth)-1)
1151             #val = 31+i*3
1152             val = i # XXX actually, not random at all
1153             yield dut.intregs.regs[i].reg.eq(val)
1154             alusim.setval(i, val)
1155
1156         # create some instructions
1157         lst = []
1158         if False:
1159             lst += ["addi 2, 0, 0x4321",
1160                    "addi 3, 0, 0x1234",
1161                    "add  1, 3, 2",
1162                    "add  4, 3, 5"
1163                     ]
1164         if True:
1165             lst += [ "lbzu 6, 7(2)",
1166
1167                    ]
1168
1169         with Program(lst) as program:
1170             gen = program.generate_instructions()
1171
1172             # issue instruction(s), wait for issue to be free before proceeding
1173             for ins, code in zip(gen, program.assembly.splitlines()):
1174                 yield instruction.eq(ins)          # raw binary instr.
1175                 yield #Delay(1e-6)
1176
1177                 print("binary 0x{:X}".format(ins & 0xffffffff))
1178                 print("assembly", code)
1179
1180                 #alusim.op(op, opi, imm, src1, src2, dest)
1181                 yield from power_instr_q(dut, pdecode2, ins, code)
1182
1183         # wait for all instructions to stop before checking
1184         while True:
1185             iqlen = yield dut.qlen_o
1186             if iqlen == 0:
1187                 break
1188             yield
1189         yield
1190         yield
1191         yield
1192         yield
1193         yield from wait_for_busy_clear(dut)
1194
1195         # check status
1196         yield from alusim.check(dut)
1197         yield from alusim.dump(dut)
1198
1199
1200 def scoreboard_sim(dut, alusim):
1201
1202     seed(0)
1203
1204     for i in range(1):
1205
1206         # set random values in the registers
1207         for i in range(1, dut.n_regs):
1208             #val = randint(0, (1<<alusim.rwidth)-1)
1209             #val = 31+i*3
1210             val = i
1211             yield dut.intregs.regs[i].reg.eq(val)
1212             alusim.setval(i, val)
1213
1214         # create some instructions (some random, some regression tests)
1215         instrs = []
1216         if False:
1217             instrs = create_random_ops(dut, 15, True, 4)
1218
1219         if False:  # LD/ST test (with immediate)
1220             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1221             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1222
1223         if False:
1224             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1225
1226         if False:
1227             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1228             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1229             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1230
1231         if True:
1232             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1233                            0, 0, (0, 0)))
1234             instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1235                            0, 0, (0, 0)))
1236         if False:
1237             instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1238                            1, 7, (0, 0)))
1239         if False:
1240             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1241                            0, 0, (0, 0)))
1242
1243         if False:
1244             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1245             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1246             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1247             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1248             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1249
1250         if False:
1251             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1252             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1253             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1254
1255         if False:
1256             instrs.append((5, 6, 2, 1))
1257             instrs.append((2, 2, 4, 0))
1258             #instrs.append((2, 2, 3, 1))
1259
1260         if False:
1261             instrs.append((2, 1, 2, 3))
1262
1263         if False:
1264             instrs.append((2, 6, 2, 1))
1265             instrs.append((2, 1, 2, 0))
1266
1267         if False:
1268             instrs.append((1, 2, 7, 2))
1269             instrs.append((7, 1, 5, 0))
1270             instrs.append((4, 4, 1, 1))
1271
1272         if False:
1273             instrs.append((5, 6, 2, 2))
1274             instrs.append((1, 1, 4, 1))
1275             instrs.append((6, 5, 3, 0))
1276
1277         if False:
1278             # Write-after-Write Hazard
1279             instrs.append((3, 6, 7, 2))
1280             instrs.append((4, 4, 7, 1))
1281
1282         if False:
1283             # self-read/write-after-write followed by Read-after-Write
1284             instrs.append((1, 1, 1, 1))
1285             instrs.append((1, 5, 3, 0))
1286
1287         if False:
1288             # Read-after-Write followed by self-read-after-write
1289             instrs.append((5, 6, 1, 2))
1290             instrs.append((1, 1, 1, 1))
1291
1292         if False:
1293             # self-read-write sandwich
1294             instrs.append((5, 6, 1, 2))
1295             instrs.append((1, 1, 1, 1))
1296             instrs.append((1, 5, 3, 0))
1297
1298         if False:
1299             # very weird failure
1300             instrs.append((5, 2, 5, 2))
1301             instrs.append((2, 6, 3, 0))
1302             instrs.append((4, 2, 2, 1))
1303
1304         if False:
1305             v1 = 4
1306             yield dut.intregs.regs[5].reg.eq(v1)
1307             alusim.setval(5, v1)
1308             yield dut.intregs.regs[3].reg.eq(5)
1309             alusim.setval(3, 5)
1310             instrs.append((5, 3, 3, 4, (0, 0)))
1311             instrs.append((4, 2, 1, 2, (0, 1)))
1312
1313         if False:
1314             v1 = 6
1315             yield dut.intregs.regs[5].reg.eq(v1)
1316             alusim.setval(5, v1)
1317             yield dut.intregs.regs[3].reg.eq(5)
1318             alusim.setval(3, 5)
1319             instrs.append((5, 3, 3, 4, (0, 0)))
1320             instrs.append((4, 2, 1, 2, (1, 0)))
1321
1322         if False:
1323             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1324             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1325             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1326             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1327             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1328             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1329             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1330             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1331             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1332
1333         # issue instruction(s), wait for issue to be free before proceeding
1334         for i, instr in enumerate(instrs):
1335             print (i, instr)
1336             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1337
1338             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1339                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1340             alusim.op(op, opi, imm, src1, src2, dest)
1341             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1342                                br_ok, br_fail)
1343
1344         # wait for all instructions to stop before checking
1345         while True:
1346             iqlen = yield dut.qlen_o
1347             if iqlen == 0:
1348                 break
1349             yield
1350         yield
1351         yield
1352         yield
1353         yield
1354         yield from wait_for_busy_clear(dut)
1355
1356         # check status
1357         yield from alusim.check(dut)
1358         yield from alusim.dump(dut)
1359
1360
1361 def test_scoreboard():
1362     regwidth = 64
1363     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1364     alusim = RegSim(regwidth, 8)
1365     memsim = MemSim(16, 8)
1366
1367     m = Module()
1368     comb = m.d.comb
1369     instruction = Signal(32)
1370
1371     # set up the decoder (and simulator, later)
1372     pdecode = create_pdecode()
1373     #simulator = ISA(pdecode, initial_regs)
1374
1375     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1376     m.submodules.sim = dut
1377
1378     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1379     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1380
1381     vl = rtlil.convert(m, ports=dut.ports())
1382     with open("test_scoreboard6600.il", "w") as f:
1383         f.write(vl)
1384
1385     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1386                    vcd_name='test_powerboard6600.vcd')
1387
1388     #run_simulation(dut, scoreboard_sim(dut, alusim),
1389     #               vcd_name='test_scoreboard6600.vcd')
1390
1391     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1392     #                    vcd_name='test_scoreboard6600.vcd')
1393
1394
1395 if __name__ == '__main__':
1396     test_scoreboard()