src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, ortreereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst_multi import LDSTCompUnit
  20 from soc.experiment.compldst_multi import CompLDSTOpSubset
  21 from soc.experiment.l0_cache import TstL0CacheBuffer
  22
  23 from soc.experiment.alu_hier import ALU, BranchALU
  24 from soc.fu.alu.alu_input_record import CompALUOpSubset
  25
  26 from openpower.decoder.power_enums import MicrOp, Function
  27 from openpower.decoder.power_decoder import (create_pdecode)
  28 from openpower.decoder.power_decoder2 import (PowerDecode2)
  29 from openpower.decoder.power_decoder2 import Decode2ToExecute1Type
  30
  31 from openpower.simulator.program import Program
  32
  33
  34 from nmutil.latch import SRLatch
  35 from nmutil.nmoperator import eq
  36
  37 from random import randint, seed
  38 from copy import deepcopy
  39 from math import log
  40
  41 from soc.experiment.sim import RegSim, MemSim
  42 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  43
  44
  45 class CompUnitsBase(Elaboratable):
  46     """ Computation Unit Base class.
  47
  48         Amazingly, this class works recursively.  It's supposed to just
  49         look after some ALUs (that can handle the same operations),
  50         grouping them together, however it turns out that the same code
  51         can also group *groups* of Computation Units together as well.
  52
  53         Basically it was intended just to concatenate the ALU's issue,
  54         go_rd etc. signals together, which start out as bits and become
  55         sequences.  Turns out that the same trick works just as well
  56         on Computation Units!
  57
  58         So this class may be used recursively to present a top-level
  59         sequential concatenation of all the signals in and out of
  60         ALUs, whilst at the same time making it convenient to group
  61         ALUs together.
  62
  63         At the lower level, the intent is that groups of (identical)
  64         ALUs may be passed the same operation.  Even beyond that,
  65         the intent is that that group of (identical) ALUs actually
  66         share the *same pipeline* and as such become a "Concurrent
  67         Computation Unit" as defined by Mitch Alsup (see section
  68         11.4.9.3)
  69     """
  70
  71     def __init__(self, rwid, units, ldstmode=False):
  72         """ Inputs:
  73
  74             * :rwid:   bit width of register file(s) - both FP and INT
  75             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  76         """
  77         self.units = units
  78         self.ldstmode = ldstmode
  79         self.rwid = rwid
  80         self.rwid = rwid
  81         if units and isinstance(units[0], CompUnitsBase):
  82             self.n_units = 0
  83             for u in self.units:
  84                 self.n_units += u.n_units
  85         else:
  86             self.n_units = len(units)
  87
  88         n_units = self.n_units
  89
  90         # inputs
  91         self.issue_i = Signal(n_units, reset_less=True)
  92         self.rd0 = go_record(n_units, "rd0")
  93         self.rd1 = go_record(n_units, "rd1")
  94         self.go_rd_i = [self.rd0.go, self.rd1.go]  # XXX HACK!
  95         self.wr0 = go_record(n_units, "wr0")
  96         self.go_wr_i = [self.wr0.go]
  97         self.shadown_i = Signal(n_units, reset_less=True)
  98         self.go_die_i = Signal(n_units, reset_less=True)
  99         if ldstmode:
 100             self.go_ad_i = Signal(n_units, reset_less=True)
 101             self.go_st_i = Signal(n_units, reset_less=True)
 102
 103         # outputs
 104         self.busy_o = Signal(n_units, reset_less=True)
 105         self.rd_rel_o = [self.rd0.rel, self.rd1.rel]  # HACK!
 106         self.req_rel_o = self.wr0.rel
 107         self.done_o = Signal(n_units, reset_less=True)
 108         if ldstmode:
 109             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 110             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 111             self.adr_rel_o = Signal(n_units, reset_less=True)
 112             self.sto_rel_o = Signal(n_units, reset_less=True)
 113             self.load_mem_o = Signal(n_units, reset_less=True)
 114             self.stwd_mem_o = Signal(n_units, reset_less=True)
 115             self.addr_o = Signal(rwid, reset_less=True)
 116
 117         # in/out register data (note: not register#, actual data)
 118         self.data_o = Signal(rwid, reset_less=True)
 119         self.src1_i = Signal(rwid, reset_less=True)
 120         self.src2_i = Signal(rwid, reset_less=True)
 121         # input operand
 122
 123     def elaborate(self, platform):
 124         m = Module()
 125         comb = m.d.comb
 126
 127         for i, alu in enumerate(self.units):
 128             setattr(m.submodules, "comp%d" % i, alu)
 129
 130         go_rd_l0 = []
 131         go_rd_l1 = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         done_l = []
 137         rd_rel0_l = []
 138         rd_rel1_l = []
 139         shadow_l = []
 140         godie_l = []
 141         for alu in self.units:
 142             req_rel_l.append(alu.req_rel_o)
 143             done_l.append(alu.done_o)
 144             shadow_l.append(alu.shadown_i)
 145             godie_l.append(alu.go_die_i)
 146             print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 147             rd_rel0_l.append(alu.rd_rel_o[0])
 148             rd_rel1_l.append(alu.rd_rel_o[1])
 149             go_wr_l.append(alu.go_wr_i)
 150             go_rd_l0.append(alu.go_rd_i[0])
 151             go_rd_l1.append(alu.go_rd_i[1])
 152             issue_l.append(alu.issue_i)
 153             busy_l.append(alu.busy_o)
 154         comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
 155         comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
 156         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 157         comb += self.done_o.eq(Cat(*done_l))
 158         comb += self.busy_o.eq(Cat(*busy_l))
 159         comb += Cat(*godie_l).eq(self.go_die_i)
 160         comb += Cat(*shadow_l).eq(self.shadown_i)
 161         comb += Cat(*go_wr_l).eq(self.wr0.go)  # XXX TODO
 162         comb += Cat(*go_rd_l0).eq(self.rd0.go)
 163         comb += Cat(*go_rd_l1).eq(self.rd1.go)
 164         comb += Cat(*issue_l).eq(self.issue_i)
 165
 166         # connect data register input/output
 167
 168         # merge (OR) all integer FU / ALU outputs to a single value
 169         # XXX NOTE: this only works because there is a single "port"
 170         # protected by a single go_wr.  multi-issue requires a bus
 171         # to be inserted here.
 172         if self.units:
 173             data_o = ortreereduce(self.units, "data_o")
 174             comb += self.data_o.eq(data_o)
 175             if self.ldstmode:
 176                 addr_o = ortreereduce(self.units, "addr_o")
 177                 comb += self.addr_o.eq(addr_o)
 178
 179         for i, alu in enumerate(self.units):
 180             comb += alu.src1_i.eq(self.src1_i)
 181             comb += alu.src2_i.eq(self.src2_i)
 182
 183         if not self.ldstmode:
 184             return m
 185
 186         ldmem_l = []
 187         stmem_l = []
 188         go_ad_l = []
 189         go_st_l = []
 190         ld_l = []
 191         st_l = []
 192         adr_rel_l = []
 193         sto_rel_l = []
 194         for alu in self.units:
 195             ld_l.append(alu.ld_o)
 196             st_l.append(alu.st_o)
 197             adr_rel_l.append(alu.adr_rel_o)
 198             sto_rel_l.append(alu.sto_rel_o)
 199             ldmem_l.append(alu.load_mem_o)
 200             stmem_l.append(alu.stwd_mem_o)
 201             go_ad_l.append(alu.go_ad_i)
 202             go_st_l.append(alu.go_st_i)
 203         comb += self.ld_o.eq(Cat(*ld_l))
 204         comb += self.st_o.eq(Cat(*st_l))
 205         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 206         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 207         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 208         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 209         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 210         comb += Cat(*go_st_l).eq(self.go_st_i)
 211
 212         return m
 213
 214
 215 class CompUnitLDSTs(CompUnitsBase):
 216
 217     def __init__(self, rwid, opwid, n_ldsts, l0):
 218         """ Inputs:
 219
 220             * :rwid:   bit width of register file(s) - both FP and INT
 221             * :opwid:  operand bit width
 222         """
 223         self.opwid = opwid
 224
 225         # inputs
 226         self.op = CompLDSTOpSubset("cul_i")
 227
 228         # LD/ST Units
 229         units = []
 230         for i in range(n_ldsts):
 231             pi = l0.l0.dports[i].pi
 232             units.append(LDSTCompUnit(pi, rwid, awid=48))
 233
 234         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 235
 236     def elaborate(self, platform):
 237         m = CompUnitsBase.elaborate(self, platform)
 238         comb = m.d.comb
 239
 240         # hand the same operation to all units
 241         for ldst in self.units:
 242             comb += ldst.oper_i.eq(self.op)
 243
 244         return m
 245
 246
 247 class CompUnitALUs(CompUnitsBase):
 248
 249     def __init__(self, rwid, opwid, n_alus):
 250         """ Inputs:
 251
 252             * :rwid:   bit width of register file(s) - both FP and INT
 253             * :opwid:  operand bit width
 254         """
 255         self.opwid = opwid
 256
 257         # inputs
 258         self.op = CompALUOpSubset("cua_i")
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3  # extra bit for immediate mode
 268             units.append(MultiCompUnit(rwid, alu, CompALUOpSubset))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the subset of operation to ALUs
 277         for alu in self.units:
 278             comb += alu.oper_i.eq(self.op)
 279
 280         return m
 281
 282
 283 class CompUnitBR(CompUnitsBase):
 284
 285     def __init__(self, rwid, opwid):
 286         """ Inputs:
 287
 288             * :rwid:   bit width of register file(s) - both FP and INT
 289             * :opwid:  operand bit width
 290
 291             Note: bgt unit is returned so that a shadow unit can be created
 292             for it
 293         """
 294         self.opwid = opwid
 295
 296         # inputs
 297         self.op = CompALUOpSubset("cua_i")  # TODO - CompALUBranchSubset
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3  # extra bit for immediate mode
 304         self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             # comb += alu.oper_i.eq(self.op) # TODO
 314             comb += alu.oper_i.eq(self.oper_i)
 315             #comb += alu.imm_i.eq(self.imm_i)
 316
 317         return m
 318
 319
 320 class FunctionUnits(Elaboratable):
 321
 322     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 323         self.n_src, self.n_dst = n_src, n_dst
 324         self.n_reg = n_reg
 325         self.n_int_alus = nf = n_int_alus
 326
 327         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 328         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 329
 330         self.readable_o = Signal(n_int_alus, reset_less=True)
 331         self.writable_o = Signal(n_int_alus, reset_less=True)
 332
 333         # arrays
 334         src = []
 335         rsel = []
 336         rd = []
 337         for i in range(n_src):
 338             j = i + 1  # name numbering to match src1/src2
 339             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 340             rsel.append(Signal(n_reg, name="src%d_rsel_o" %
 341                                j, reset_less=True))
 342             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 343         dst = []
 344         dsel = []
 345         wr = []
 346         for i in range(n_dst):
 347             j = i + 1  # name numbering to match src1/src2
 348             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 349             dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
 350                                j, reset_less=True))
 351             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 352         wpnd = []
 353         pend = []
 354         for i in range(nf):
 355             j = i + 1  # name numbering to match src1/src2
 356             pend.append(Signal(nf, name="rd_src%d_pend_o" %
 357                                j, reset_less=True))
 358             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
 359                                j, reset_less=True))
 360
 361         self.dest_i = Array(dst)     # Dest in (top)
 362         self.src_i = Array(src)      # oper in (top)
 363
 364         # for Register File Select Lines (horizontal), per-reg
 365         self.dst_rsel_o = Array(dsel)  # dest reg (bot)
 366         self.src_rsel_o = Array(rsel)  # src reg (bot)
 367
 368         self.go_rd_i = Array(rd)
 369         self.go_wr_i = Array(wr)
 370
 371         self.go_die_i = Signal(n_int_alus, reset_less=True)
 372         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 373
 374         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 375
 376     def elaborate(self, platform):
 377         m = Module()
 378         comb = m.d.comb
 379         sync = m.d.sync
 380
 381         n_intfus = self.n_int_alus
 382
 383         # Integer FU-FU Dep Matrix
 384         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 385         m.submodules.intfudeps = intfudeps
 386         # Integer FU-Reg Dep Matrix
 387         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 388         m.submodules.intregdeps = intregdeps
 389
 390         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 391         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 392
 393         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 394         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 395
 396         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 397         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 398         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 399
 400         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 401         comb += intfudeps.go_die_i.eq(self.go_die_i)
 402         comb += self.readable_o.eq(intfudeps.readable_o)
 403         comb += self.writable_o.eq(intfudeps.writable_o)
 404
 405         # Connect function issue / arrays, and dest/src1/src2
 406         for i in range(self.n_src):
 407             print(i, self.go_rd_i, intfudeps.go_rd_i)
 408             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 409             comb += intregdeps.src_i[i].eq(self.src_i[i])
 410             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 411             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 412         for i in range(self.n_dst):
 413             print(i, self.go_wr_i, intfudeps.go_wr_i)
 414             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 415             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 416             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 417             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 418         comb += intregdeps.go_die_i.eq(self.go_die_i)
 419         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 420
 421         return m
 422
 423
 424 class Scoreboard(Elaboratable):
 425     def __init__(self, rwid, n_regs):
 426         """ Inputs:
 427
 428             * :rwid:   bit width of register file(s) - both FP and INT
 429             * :n_regs: depth of register file(s) - number of FP and INT regs
 430         """
 431         self.rwid = rwid
 432         self.n_regs = n_regs
 433
 434         # Register Files
 435         self.intregs = RegFileArray(rwid, n_regs)
 436         self.fpregs = RegFileArray(rwid, n_regs)
 437
 438         # Memory (test for now)
 439         self.l0 = TstL0CacheBuffer()
 440
 441         # issue q needs to get at these
 442         self.aluissue = IssueUnitGroup(2)
 443         self.lsissue = IssueUnitGroup(2)
 444         self.brissue = IssueUnitGroup(1)
 445         # and these
 446         self.instr = Decode2ToExecute1Type("sc_instr")
 447         self.br_oper_i = Signal(4, reset_less=True)
 448         self.br_imm_i = Signal(rwid, reset_less=True)
 449         self.ls_oper_i = Signal(4, reset_less=True)
 450
 451         # inputs
 452         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 453         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 454         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 455         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 456
 457         # outputs
 458         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 459         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 460
 461         # for branch speculation experiment.  branch_direction = 0 if
 462         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 463         # branch_succ and branch_fail are requests to have the current
 464         # instruction be dependent on the branch unit "shadow" capability.
 465         self.branch_succ_i = Signal(reset_less=True)
 466         self.branch_fail_i = Signal(reset_less=True)
 467         self.branch_direction_o = Signal(2, reset_less=True)
 468
 469     def elaborate(self, platform):
 470         m = Module()
 471         comb = m.d.comb
 472         sync = m.d.sync
 473
 474         m.submodules.intregs = self.intregs
 475         m.submodules.fpregs = self.fpregs
 476         m.submodules.l0 = l0 = self.l0
 477
 478         # register ports
 479         int_dest = self.intregs.write_port("dest")
 480         int_src1 = self.intregs.read_port("src1")
 481         int_src2 = self.intregs.read_port("src2")
 482
 483         fp_dest = self.fpregs.write_port("dest")
 484         fp_src1 = self.fpregs.read_port("src1")
 485         fp_src2 = self.fpregs.read_port("src2")
 486
 487         # Int ALUs and BR ALUs
 488         n_int_alus = 5
 489         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 490         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 491
 492         # LDST Comp Units
 493         n_ldsts = 2
 494         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
 495
 496         # Comp Units
 497         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 498         bgt = cub.bgt  # get at the branch computation unit
 499         br1 = cub.br1
 500
 501         # Int FUs
 502         fu_n_src = 2
 503         fu_n_dst = 1
 504         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 505                                                      fu_n_src, fu_n_dst)
 506
 507         # Memory FUs
 508         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 509
 510         # Memory Priority Picker 1: one gateway per memory port
 511         # picks 1 reader and 1 writer to intreg
 512         mempick1 = GroupPicker(n_ldsts, 1, 1)
 513         m.submodules.mempick1 = mempick1
 514
 515         # Count of number of FUs
 516         n_intfus = n_int_alus
 517         n_fp_fus = 0  # for now
 518
 519         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 520         # picks 1 reader and 1 writer to intreg
 521         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 522         m.submodules.intpick1 = ipick1
 523
 524         # INT/FP Issue Unit
 525         regdecode = RegDecode(self.n_regs)
 526         m.submodules.regdecode = regdecode
 527         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 528         m.submodules.issueunit = issueunit
 529
 530         # Shadow Matrix.  currently n_intfus shadows, to be used for
 531         # write-after-write hazards.  NOTE: there is one extra for branches,
 532         # so the shadow width is increased by 1
 533         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 534         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 535
 536         # record previous instruction to cast shadow on current instruction
 537         prev_shadow = Signal(n_intfus)
 538
 539         # Branch Speculation recorder.  tracks the success/fail state as
 540         # each instruction is issued, so that when the branch occurs the
 541         # allow/cancel can be issued as appropriate.
 542         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 543
 544         # ---------
 545         # ok start wiring things together...
 546         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 547         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 548         # ---------
 549
 550         # ---------
 551         # Issue Unit is where it starts.  set up some in/outs for this module
 552         # ---------
 553         comb += [regdecode.dest_i.eq(self.int_dest_i),
 554                  regdecode.src1_i.eq(self.int_src1_i),
 555                  regdecode.src2_i.eq(self.int_src2_i),
 556                  regdecode.enable_i.eq(self.reg_enable_i),
 557                  self.issue_o.eq(issueunit.issue_o)
 558                  ]
 559
 560         # take these to outside (issue needs them)
 561         comb += cua.op.eq_from_execute1(self.instr)
 562         comb += cub.oper_i.eq(self.br_oper_i)
 563         comb += cub.imm_i.eq(self.br_imm_i)
 564         comb += cul.op.eq_from_execute1(self.instr)
 565
 566         # TODO: issueunit.f (FP)
 567
 568         # and int function issue / busy arrays, and dest/src1/src2
 569         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 570         comb += intfus.src_i[0].eq(regdecode.src1_o)
 571         comb += intfus.src_i[1].eq(regdecode.src2_o)
 572
 573         fn_issue_o = issueunit.fn_issue_o
 574
 575         comb += intfus.fn_issue_i.eq(fn_issue_o)
 576         comb += issueunit.busy_i.eq(cu.busy_o)
 577         comb += self.busy_o.eq(cu.busy_o.bool())
 578
 579         # ---------
 580         # Memory Function Unit
 581         # ---------
 582         reset_b = Signal(cul.n_units, reset_less=True)
 583         # XXX was cul.go_wr_i not done.o
 584         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 585         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 586
 587         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 588         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 589         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 590
 591         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 592         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 593         # issue_i.  multi-issue gets a bit more complex but not a lot.
 594         prior_ldsts = Signal(cul.n_units, reset_less=True)
 595         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 596         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 597             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 598         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 599             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 600
 601         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 602         # just immediately activate go_adr
 603         sync += cul.go_ad_i.eq(cul.adr_rel_o)
 604
 605         # connect up address data
 606         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 607         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 608
 609         # connect loadable / storable to go_ld/go_st.
 610         # XXX should only be done when the memory ld/st has actually happened!
 611         go_st_i = Signal(cul.n_units, reset_less=True)
 612         go_ld_i = Signal(cul.n_units, reset_less=True)
 613         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 614                            cul.adr_rel_o & cul.ld_o)
 615         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 616                            cul.sto_rel_o & cul.st_o)
 617         comb += memfus.go_ld_i.eq(go_ld_i)
 618         comb += memfus.go_st_i.eq(go_st_i)
 619         #comb += cul.go_wr_i.eq(go_ld_i)
 620         comb += cul.go_st_i.eq(go_st_i)
 621
 622         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 623         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 624         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 625
 626         # ---------
 627         # merge shadow matrices outputs
 628         # ---------
 629
 630         # these are explained in ShadowMatrix docstring, and are to be
 631         # connected to the FUReg and FUFU Matrices, to get them to reset
 632         anydie = Signal(n_intfus, reset_less=True)
 633         allshadown = Signal(n_intfus, reset_less=True)
 634         shreset = Signal(n_intfus, reset_less=True)
 635         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 636         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 637         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 638
 639         # ---------
 640         # connect fu-fu matrix
 641         # ---------
 642
 643         # Group Picker... done manually for now.
 644         go_rd_o = ipick1.go_rd_o
 645         go_wr_o = ipick1.go_wr_o
 646         go_rd_i = intfus.go_rd_i
 647         go_wr_i = intfus.go_wr_i
 648         go_die_i = intfus.go_die_i
 649         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 650         for i in range(fu_n_src):
 651             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 652         for i in range(fu_n_dst):
 653             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 654         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 655
 656         # Connect Picker
 657         # ---------
 658         int_rd_o = intfus.readable_o
 659         rrel_o = cu.rd_rel_o
 660         rqrl_o = cu.req_rel_o
 661         for i in range(fu_n_src):
 662             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 663             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 664         int_wr_o = intfus.writable_o
 665         for i in range(fu_n_dst):
 666             # XXX FIXME: rqrl_o[i] here
 667             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 668             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 669
 670         # ---------
 671         # Shadow Matrix
 672         # ---------
 673
 674         comb += shadows.issue_i.eq(fn_issue_o)
 675         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 676         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 677         # ---------
 678         # NOTE; this setup is for the instruction order preservation...
 679
 680         # connect shadows / go_dies to Computation Units
 681         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 682         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 683
 684         # ok connect first n_int_fu shadows to busy lines, to create an
 685         # instruction-order linked-list-like arrangement, using a bit-matrix
 686         # (instead of e.g. a ring buffer).
 687
 688         # when written, the shadow can be cancelled (and was good)
 689         for i in range(n_intfus):
 690             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 691             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 692             # because the comp unit is only free once completed
 693             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 694
 695         # *previous* instruction shadows *current* instruction, and, obviously,
 696         # if the previous is completed (!busy) don't cast the shadow!
 697         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 698         for i in range(n_intfus):
 699             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 700
 701         # ---------
 702         # ... and this is for branch speculation.  it uses the extra bit
 703         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 704         # only needs to set shadow_i, s_fail_i and s_good_i
 705
 706         # issue captures shadow_i (if enabled)
 707         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 708
 709         bactive = Signal(reset_less=True)
 710         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 711
 712         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 713         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 714             comb += bshadow.issue_i.eq(fn_issue_o)
 715             for i in range(n_intfus):
 716                 with m.If(fn_issue_o & (Const(1 << i))):
 717                     comb += bshadow.shadow_i[i][0].eq(1)
 718
 719         # finally, we need an indicator to the test infrastructure as to
 720         # whether the branch succeeded or failed, plus, link up to the
 721         # "recorder" of whether the instruction was under shadow or not
 722
 723         with m.If(br1.issue_i):
 724             sync += bspec.active_i.eq(1)
 725         with m.If(self.branch_succ_i):
 726             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 727         with m.If(self.branch_fail_i):
 728             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 729
 730         # branch is active (TODO: a better signal: this is over-using the
 731         # go_write signal - actually the branch should not be "writing")
 732         with m.If(br1.go_wr_i):
 733             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 734             sync += bspec.active_i.eq(0)
 735             comb += bspec.br_i.eq(1)
 736             # branch occurs if data == 1, failed if data == 0
 737             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 738             for i in range(n_intfus):
 739                 # *expected* direction of the branch matched against *actual*
 740                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 741                 # ... or it didn't
 742                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 743
 744         # ---------
 745         # Connect Register File(s)
 746         # ---------
 747         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 748         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 749         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 750
 751         # connect ALUs to regfile
 752         comb += int_dest.data_i.eq(cu.data_o)
 753         comb += cu.src1_i.eq(int_src1.data_o)
 754         comb += cu.src2_i.eq(int_src2.data_o)
 755
 756         # connect ALU Computation Units
 757         for i in range(fu_n_src):
 758             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 759         for i in range(fu_n_dst):
 760             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 761         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 762
 763         return m
 764
 765     def __iter__(self):
 766         yield from self.intregs
 767         yield from self.fpregs
 768         yield self.int_dest_i
 769         yield self.int_src1_i
 770         yield self.int_src2_i
 771         yield self.issue_o
 772         yield self.branch_succ_i
 773         yield self.branch_fail_i
 774         yield self.branch_direction_o
 775
 776     def ports(self):
 777         return list(self)
 778
 779
 780 class IssueToScoreboard(Elaboratable):
 781
 782     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 783         self.qlen = qlen
 784         self.n_in = n_in
 785         self.n_out = n_out
 786         self.rwid = rwid
 787         self.opw = opwid
 788         self.n_regs = n_regs
 789
 790         mqbits = unsigned(int(log(qlen) / log(2))+2)
 791         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 792         self.p_ready_o = Signal()  # instructions were added
 793         self.data_i = Instruction._nq(n_in, "data_i")
 794
 795         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 796         self.qlen_o = Signal(mqbits, reset_less=True)
 797
 798     def elaborate(self, platform):
 799         m = Module()
 800         comb = m.d.comb
 801         sync = m.d.sync
 802
 803         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 804                           self.n_in, self.n_out)
 805         sc = Scoreboard(self.rwid, self.n_regs)
 806         m.submodules.iq = iq
 807         m.submodules.sc = sc
 808
 809         # get at the regfile for testing
 810         self.intregs = sc.intregs
 811
 812         # and the "busy" signal and instruction queue length
 813         comb += self.busy_o.eq(sc.busy_o)
 814         comb += self.qlen_o.eq(iq.qlen_o)
 815
 816         # link up instruction queue
 817         comb += iq.p_add_i.eq(self.p_add_i)
 818         comb += self.p_ready_o.eq(iq.p_ready_o)
 819         for i in range(self.n_in):
 820             comb += eq(iq.data_i[i], self.data_i[i])
 821
 822         # take instruction and process it.  note that it's possible to
 823         # "inspect" the queue contents *without* actually removing the
 824         # items.  items are only removed when the
 825
 826         # in "waiting" state
 827         wait_issue_br = Signal()
 828         wait_issue_alu = Signal()
 829         wait_issue_ls = Signal()
 830
 831         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 832             # set instruction pop length to 1 if the unit accepted
 833             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 834                 with m.If(iq.qlen_o != 0):
 835                     comb += iq.n_sub_i.eq(1)
 836             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 837                 with m.If(iq.qlen_o != 0):
 838                     comb += iq.n_sub_i.eq(1)
 839             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 840                 with m.If(iq.qlen_o != 0):
 841                     comb += iq.n_sub_i.eq(1)
 842
 843         # see if some instruction(s) are here.  note that this is
 844         # "inspecting" the in-place queue.  note also that on the
 845         # cycle following "waiting" for fn_issue_o to be set, the
 846         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 847         with m.If(iq.qlen_o != 0):
 848             # get the operands and operation
 849             instr = iq.data_o[0]
 850             imm = instr.imm_data.data
 851             dest = instr.write_reg.data
 852             src1 = instr.read_reg1.data
 853             src2 = instr.read_reg2.data
 854             op = instr.insn_type
 855             fu = instr.fn_unit
 856             opi = instr.imm_data.ok  # immediate set
 857
 858             # set the src/dest regs
 859             comb += sc.int_dest_i.eq(dest)
 860             comb += sc.int_src1_i.eq(src1)
 861             comb += sc.int_src2_i.eq(src2)
 862             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 863             comb += sc.instr.eq(instr)
 864
 865             # choose a Function-Unit-Group
 866             with m.If(fu == Function.ALU):  # alu
 867                 comb += sc.aluissue.insn_i.eq(1)  # enable alu issue
 868                 comb += wait_issue_alu.eq(1)
 869             with m.Elif(fu == Function.LDST):  # ld/st
 870                 comb += sc.lsissue.insn_i.eq(1)  # enable ldst issue
 871                 comb += wait_issue_ls.eq(1)
 872
 873             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 874                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 875                 comb += sc.br_imm_i.eq(imm)
 876                 comb += sc.brissue.insn_i.eq(1)
 877                 comb += wait_issue_br.eq(1)
 878             # XXX TODO
 879             # these indicate that the instruction is to be made
 880             # shadow-dependent on
 881             # (either) branch success or branch fail
 882             # yield sc.branch_fail_i.eq(branch_fail)
 883             # yield sc.branch_succ_i.eq(branch_success)
 884
 885         return m
 886
 887     def __iter__(self):
 888         yield self.p_ready_o
 889         for o in self.data_i:
 890             yield from list(o)
 891         yield self.p_add_i
 892
 893     def ports(self):
 894         return list(self)
 895
 896
 897 def power_instr_q(dut, pdecode2, ins, code):
 898     instrs = [pdecode2.e]
 899
 900     sendlen = 1
 901     for idx, instr in enumerate(instrs):
 902         yield dut.data_i[idx].eq(instr)
 903         insn_type = yield instr.insn_type
 904         fn_unit = yield instr.fn_unit
 905         print("senddata ", idx, insn_type, fn_unit, instr)
 906     yield dut.p_add_i.eq(sendlen)
 907     yield
 908     o_p_ready = yield dut.p_ready_o
 909     while not o_p_ready:
 910         yield
 911         o_p_ready = yield dut.p_ready_o
 912
 913     yield dut.p_add_i.eq(0)
 914
 915
 916 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 917             branch_success, branch_fail):
 918     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 919                'imm_data': (imm, op_imm),
 920                'read_reg1': src1, 'read_reg2': src2}]
 921
 922     sendlen = 1
 923     for idx, instr in enumerate(instrs):
 924         imm, op_imm = instr['imm_data']
 925         reg1 = instr['read_reg1']
 926         reg2 = instr['read_reg2']
 927         dest = instr['write_reg']
 928         insn_type = instr['insn_type']
 929         fn_unit = instr['fn_unit']
 930         yield dut.data_i[idx].insn_type.eq(insn_type)
 931         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 932         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 933         yield dut.data_i[idx].read_reg1.ok.eq(1)  # XXX TODO
 934         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 935         yield dut.data_i[idx].read_reg2.ok.eq(1)  # XXX TODO
 936         yield dut.data_i[idx].write_reg.data.eq(dest)
 937         yield dut.data_i[idx].write_reg.ok.eq(1)  # XXX TODO
 938         yield dut.data_i[idx].imm_data.data.eq(imm)
 939         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 940         di = yield dut.data_i[idx]
 941         print("senddata %d %x" % (idx, di))
 942     yield dut.p_add_i.eq(sendlen)
 943     yield
 944     o_p_ready = yield dut.p_ready_o
 945     while not o_p_ready:
 946         yield
 947         o_p_ready = yield dut.p_ready_o
 948
 949     yield dut.p_add_i.eq(0)
 950
 951
 952 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 953     yield from disable_issue(dut)
 954     yield dut.int_dest_i.eq(dest)
 955     yield dut.int_src1_i.eq(src1)
 956     yield dut.int_src2_i.eq(src2)
 957     if (op & (0x3 << 2)) != 0:  # branch
 958         yield dut.brissue.insn_i.eq(1)
 959         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 960         yield dut.br_imm_i.eq(imm)
 961         dut_issue = dut.brissue
 962     else:
 963         yield dut.aluissue.insn_i.eq(1)
 964         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 965         yield dut.alu_imm_i.eq(imm)
 966         dut_issue = dut.aluissue
 967     yield dut.reg_enable_i.eq(1)
 968
 969     # these indicate that the instruction is to be made shadow-dependent on
 970     # (either) branch success or branch fail
 971     yield dut.branch_fail_i.eq(branch_fail)
 972     yield dut.branch_succ_i.eq(branch_success)
 973
 974     yield
 975     yield from wait_for_issue(dut, dut_issue)
 976
 977
 978 def print_reg(dut, rnums):
 979     rs = []
 980     for rnum in rnums:
 981         reg = yield dut.intregs.regs[rnum].reg
 982         rs.append("%x" % reg)
 983     rnums = map(str, rnums)
 984     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 985
 986
 987 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 988     insts = []
 989     for i in range(n_ops):
 990         src1 = randint(1, dut.n_regs-1)
 991         src2 = randint(1, dut.n_regs-1)
 992         imm = randint(1, (1 << dut.rwid)-1)
 993         dest = randint(1, dut.n_regs-1)
 994         op = randint(0, max_opnums)
 995         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 996
 997         if shadowing:
 998             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 999         else:
1000             insts.append((src1, src2, dest, op, opi, imm))
1001     return insts
1002
1003
1004 def wait_for_busy_clear(dut):
1005     while True:
1006         busy_o = yield dut.busy_o
1007         if not busy_o:
1008             break
1009         print("busy",)
1010         yield
1011
1012
1013 def disable_issue(dut):
1014     yield dut.aluissue.insn_i.eq(0)
1015     yield dut.brissue.insn_i.eq(0)
1016     yield dut.lsissue.insn_i.eq(0)
1017
1018
1019 def wait_for_issue(dut, dut_issue):
1020     while True:
1021         issue_o = yield dut_issue.fn_issue_o
1022         if issue_o:
1023             yield from disable_issue(dut)
1024             yield dut.reg_enable_i.eq(0)
1025             break
1026         print("busy",)
1027         # yield from print_reg(dut, [1,2,3])
1028         yield
1029     # yield from print_reg(dut, [1,2,3])
1030
1031
1032 def scoreboard_branch_sim(dut, alusim):
1033
1034     iseed = 3
1035
1036     for i in range(1):
1037
1038         print("rseed", iseed)
1039         seed(iseed)
1040         iseed += 1
1041
1042         yield dut.branch_direction_o.eq(0)
1043
1044         # set random values in the registers
1045         for i in range(1, dut.n_regs):
1046             val = 31+i*3
1047             val = randint(0, (1 << alusim.rwidth)-1)
1048             yield dut.intregs.regs[i].reg.eq(val)
1049             alusim.setval(i, val)
1050
1051         if False:
1052             # create some instructions: branches create a tree
1053             insts = create_random_ops(dut, 1, True, 1)
1054             #insts.append((6, 6, 1, 2, (0, 0)))
1055             #insts.append((4, 3, 3, 0, (0, 0)))
1056
1057             src1 = randint(1, dut.n_regs-1)
1058             src2 = randint(1, dut.n_regs-1)
1059             #op = randint(4, 7)
1060             op = 4  # only BGT at the moment
1061
1062             branch_ok = create_random_ops(dut, 1, True, 1)
1063             branch_fail = create_random_ops(dut, 1, True, 1)
1064
1065             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1066
1067         if True:
1068             insts = []
1069             insts.append((3, 5, 2, 0, (0, 0)))
1070             branch_ok = []
1071             branch_fail = []
1072             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1073             branch_ok.append(None)
1074             branch_fail.append((1, 1, 2, 0, (0, 1)))
1075             #branch_fail.append( None )
1076             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1077
1078         siminsts = deepcopy(insts)
1079
1080         # issue instruction(s)
1081         i = -1
1082         instrs = insts
1083         branch_direction = 0
1084         while instrs:
1085             yield
1086             yield
1087             i += 1
1088             branch_direction = yield dut.branch_direction_o  # way branch went
1089             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1090             if branch_direction == 1 and shadow_on:
1091                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1092                 continue  # branch was "success" and this is a "failed"... skip
1093             if branch_direction == 2 and shadow_off:
1094                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1095                 continue  # branch was "fail" and this is a "success"... skip
1096             if branch_direction != 0:
1097                 shadow_on = 0
1098                 shadow_off = 0
1099             is_branch = op >= 4
1100             if is_branch:
1101                 branch_ok, branch_fail = dest
1102                 dest = src2
1103                 # ok zip up the branch success / fail instructions and
1104                 # drop them into the queue, one marked "to have branch success"
1105                 # the other to be marked shadow branch "fail".
1106                 # one out of each of these will be cancelled
1107                 for ok, fl in zip(branch_ok, branch_fail):
1108                     if ok:
1109                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1110                     if fl:
1111                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1112             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1113                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1114             yield from int_instr(dut, op, src1, src2, dest,
1115                                  shadow_on, shadow_off)
1116
1117         # wait for all instructions to stop before checking
1118         yield
1119         yield from wait_for_busy_clear(dut)
1120
1121         i = -1
1122         while siminsts:
1123             instr = siminsts.pop(0)
1124             if instr is None:
1125                 continue
1126             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1127             i += 1
1128             is_branch = op >= 4
1129             if is_branch:
1130                 branch_ok, branch_fail = dest
1131                 dest = src2
1132             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1133                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1134             branch_res = alusim.op(op, src1, src2, dest)
1135             if is_branch:
1136                 if branch_res:
1137                     siminsts += branch_ok
1138                 else:
1139                     siminsts += branch_fail
1140
1141         # check status
1142         yield from alusim.check(dut)
1143         yield from alusim.dump(dut)
1144
1145
1146 def power_sim(m, dut, pdecode2, instruction, alusim):
1147
1148     seed(0)
1149
1150     for i in range(1):
1151
1152         # set random values in the registers
1153         for i in range(1, dut.n_regs):
1154             #val = randint(0, (1<<alusim.rwidth)-1)
1155             #val = 31+i*3
1156             val = i  # XXX actually, not random at all
1157             yield dut.intregs.regs[i].reg.eq(val)
1158             alusim.setval(i, val)
1159
1160         # create some instructions
1161         lst = []
1162         if False:
1163             lst += ["addi 2, 0, 0x4321",
1164                     "addi 3, 0, 0x1234",
1165                     "add  1, 3, 2",
1166                     "add  4, 3, 5"
1167                     ]
1168         if True:
1169             lst += ["lbzu 6, 7(2)",
1170
1171                     ]
1172
1173         with Program(lst) as program:
1174             gen = program.generate_instructions()
1175
1176             # issue instruction(s), wait for issue to be free before proceeding
1177             for ins, code in zip(gen, program.assembly.splitlines()):
1178                 yield instruction.eq(ins)          # raw binary instr.
1179                 yield  # Delay(1e-6)
1180
1181                 print("binary 0x{:X}".format(ins & 0xffffffff))
1182                 print("assembly", code)
1183
1184                 #alusim.op(op, opi, imm, src1, src2, dest)
1185                 yield from power_instr_q(dut, pdecode2, ins, code)
1186
1187         # wait for all instructions to stop before checking
1188         while True:
1189             iqlen = yield dut.qlen_o
1190             if iqlen == 0:
1191                 break
1192             yield
1193         yield
1194         yield
1195         yield
1196         yield
1197         yield from wait_for_busy_clear(dut)
1198
1199         # check status
1200         yield from alusim.check(dut)
1201         yield from alusim.dump(dut)
1202
1203
1204 def scoreboard_sim(dut, alusim):
1205
1206     seed(0)
1207
1208     for i in range(1):
1209
1210         # set random values in the registers
1211         for i in range(1, dut.n_regs):
1212             #val = randint(0, (1<<alusim.rwidth)-1)
1213             #val = 31+i*3
1214             val = i
1215             yield dut.intregs.regs[i].reg.eq(val)
1216             alusim.setval(i, val)
1217
1218         # create some instructions (some random, some regression tests)
1219         instrs = []
1220         if False:
1221             instrs = create_random_ops(dut, 15, True, 4)
1222
1223         if False:  # LD/ST test (with immediate)
1224             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1225             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1226
1227         if False:
1228             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1229
1230         if False:
1231             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1232             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1233             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1234
1235         if True:
1236             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1237                            0, 0, (0, 0)))
1238             instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1239                            0, 0, (0, 0)))
1240         if False:
1241             instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1242                            1, 7, (0, 0)))
1243         if False:
1244             instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1245                            0, 0, (0, 0)))
1246
1247         if False:
1248             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1249             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1250             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1251             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1252             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1253
1254         if False:
1255             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1256             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1257             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1258
1259         if False:
1260             instrs.append((5, 6, 2, 1))
1261             instrs.append((2, 2, 4, 0))
1262             #instrs.append((2, 2, 3, 1))
1263
1264         if False:
1265             instrs.append((2, 1, 2, 3))
1266
1267         if False:
1268             instrs.append((2, 6, 2, 1))
1269             instrs.append((2, 1, 2, 0))
1270
1271         if False:
1272             instrs.append((1, 2, 7, 2))
1273             instrs.append((7, 1, 5, 0))
1274             instrs.append((4, 4, 1, 1))
1275
1276         if False:
1277             instrs.append((5, 6, 2, 2))
1278             instrs.append((1, 1, 4, 1))
1279             instrs.append((6, 5, 3, 0))
1280
1281         if False:
1282             # Write-after-Write Hazard
1283             instrs.append((3, 6, 7, 2))
1284             instrs.append((4, 4, 7, 1))
1285
1286         if False:
1287             # self-read/write-after-write followed by Read-after-Write
1288             instrs.append((1, 1, 1, 1))
1289             instrs.append((1, 5, 3, 0))
1290
1291         if False:
1292             # Read-after-Write followed by self-read-after-write
1293             instrs.append((5, 6, 1, 2))
1294             instrs.append((1, 1, 1, 1))
1295
1296         if False:
1297             # self-read-write sandwich
1298             instrs.append((5, 6, 1, 2))
1299             instrs.append((1, 1, 1, 1))
1300             instrs.append((1, 5, 3, 0))
1301
1302         if False:
1303             # very weird failure
1304             instrs.append((5, 2, 5, 2))
1305             instrs.append((2, 6, 3, 0))
1306             instrs.append((4, 2, 2, 1))
1307
1308         if False:
1309             v1 = 4
1310             yield dut.intregs.regs[5].reg.eq(v1)
1311             alusim.setval(5, v1)
1312             yield dut.intregs.regs[3].reg.eq(5)
1313             alusim.setval(3, 5)
1314             instrs.append((5, 3, 3, 4, (0, 0)))
1315             instrs.append((4, 2, 1, 2, (0, 1)))
1316
1317         if False:
1318             v1 = 6
1319             yield dut.intregs.regs[5].reg.eq(v1)
1320             alusim.setval(5, v1)
1321             yield dut.intregs.regs[3].reg.eq(5)
1322             alusim.setval(3, 5)
1323             instrs.append((5, 3, 3, 4, (0, 0)))
1324             instrs.append((4, 2, 1, 2, (1, 0)))
1325
1326         if False:
1327             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1328             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1329             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1330             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1331             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1332             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1333             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1334             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1335             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1336
1337         # issue instruction(s), wait for issue to be free before proceeding
1338         for i, instr in enumerate(instrs):
1339             print(i, instr)
1340             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1341
1342             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1343                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1344             alusim.op(op, opi, imm, src1, src2, dest)
1345             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1346                                br_ok, br_fail)
1347
1348         # wait for all instructions to stop before checking
1349         while True:
1350             iqlen = yield dut.qlen_o
1351             if iqlen == 0:
1352                 break
1353             yield
1354         yield
1355         yield
1356         yield
1357         yield
1358         yield from wait_for_busy_clear(dut)
1359
1360         # check status
1361         yield from alusim.check(dut)
1362         yield from alusim.dump(dut)
1363
1364
1365 def test_scoreboard():
1366     regwidth = 64
1367     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1368     alusim = RegSim(regwidth, 8)
1369     memsim = MemSim(16, 8)
1370
1371     m = Module()
1372     comb = m.d.comb
1373     instruction = Signal(32)
1374
1375     # set up the decoder (and simulator, later)
1376     pdecode = create_pdecode()
1377     #simulator = ISA(pdecode, initial_regs)
1378
1379     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1380     m.submodules.sim = dut
1381
1382     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1383     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1384
1385     vl = rtlil.convert(m, ports=dut.ports())
1386     with open("test_scoreboard6600.il", "w") as f:
1387         f.write(vl)
1388
1389     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1390                    vcd_name='test_powerboard6600.vcd')
1391
1392     # run_simulation(dut, scoreboard_sim(dut, alusim),
1393     #               vcd_name='test_scoreboard6600.vcd')
1394
1395     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1396     #                    vcd_name='test_scoreboard6600.vcd')
1397
1398
1399 if __name__ == '__main__':
1400     test_scoreboard()