src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst import LDSTCompUnit
  20 from soc.experiment.testmem import TestMemory
  21
  22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  23
  24 from soc.decoder.power_enums import InternalOp, Function
  25 from soc.decoder.power_decoder import (create_pdecode)
  26 from soc.decoder.power_decoder2 import (PowerDecode2)
  27 from soc.simulator.program import Program
  28
  29
  30 from nmutil.latch import SRLatch
  31 from nmutil.nmoperator import eq
  32
  33 from random import randint, seed
  34 from copy import deepcopy
  35 from math import log
  36
  37 from soc.experiment.sim import RegSim, MemSim
  38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  39
  40
  41 class CompUnitsBase(Elaboratable):
  42     """ Computation Unit Base class.
  43
  44         Amazingly, this class works recursively.  It's supposed to just
  45         look after some ALUs (that can handle the same operations),
  46         grouping them together, however it turns out that the same code
  47         can also group *groups* of Computation Units together as well.
  48
  49         Basically it was intended just to concatenate the ALU's issue,
  50         go_rd etc. signals together, which start out as bits and become
  51         sequences.  Turns out that the same trick works just as well
  52         on Computation Units!
  53
  54         So this class may be used recursively to present a top-level
  55         sequential concatenation of all the signals in and out of
  56         ALUs, whilst at the same time making it convenient to group
  57         ALUs together.
  58
  59         At the lower level, the intent is that groups of (identical)
  60         ALUs may be passed the same operation.  Even beyond that,
  61         the intent is that that group of (identical) ALUs actually
  62         share the *same pipeline* and as such become a "Concurrent
  63         Computation Unit" as defined by Mitch Alsup (see section
  64         11.4.9.3)
  65     """
  66
  67     def __init__(self, rwid, units, ldstmode=False):
  68         """ Inputs:
  69
  70             * :rwid:   bit width of register file(s) - both FP and INT
  71             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  72         """
  73         self.units = units
  74         self.ldstmode = ldstmode
  75         self.rwid = rwid
  76         self.rwid = rwid
  77         if units and isinstance(units[0], CompUnitsBase):
  78             self.n_units = 0
  79             for u in self.units:
  80                 self.n_units += u.n_units
  81         else:
  82             self.n_units = len(units)
  83
  84         n_units = self.n_units
  85
  86         # inputs
  87         self.issue_i = Signal(n_units, reset_less=True)
  88         self.rd0 = go_record(n_units, "rd0")
  89         self.rd1 = go_record(n_units, "rd1")
  90         self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
  91         self.wr0 = go_record(n_units, "wr0")
  92         self.go_wr_i = [self.wr0.go]
  93         self.shadown_i = Signal(n_units, reset_less=True)
  94         self.go_die_i = Signal(n_units, reset_less=True)
  95         if ldstmode:
  96             self.go_ad_i = Signal(n_units, reset_less=True)
  97             self.go_st_i = Signal(n_units, reset_less=True)
  98
  99         # outputs
 100         self.busy_o = Signal(n_units, reset_less=True)
 101         self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
 102         self.req_rel_o = self.wr0.rel
 103         self.done_o = Signal(n_units, reset_less=True)
 104         if ldstmode:
 105             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 106             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 107             self.adr_rel_o = Signal(n_units, reset_less=True)
 108             self.sto_rel_o = Signal(n_units, reset_less=True)
 109             self.load_mem_o = Signal(n_units, reset_less=True)
 110             self.stwd_mem_o = Signal(n_units, reset_less=True)
 111             self.addr_o = Signal(rwid, reset_less=True)
 112
 113         # in/out register data (note: not register#, actual data)
 114         self.data_o = Signal(rwid, reset_less=True)
 115         self.src1_i = Signal(rwid, reset_less=True)
 116         self.src2_i = Signal(rwid, reset_less=True)
 117         # input operand
 118
 119     def elaborate(self, platform):
 120         m = Module()
 121         comb = m.d.comb
 122
 123         for i, alu in enumerate(self.units):
 124             setattr(m.submodules, "comp%d" % i, alu)
 125
 126         go_rd_l0 = []
 127         go_rd_l1 = []
 128         go_wr_l = []
 129         issue_l = []
 130         busy_l = []
 131         req_rel_l = []
 132         done_l = []
 133         rd_rel0_l = []
 134         rd_rel1_l = []
 135         shadow_l = []
 136         godie_l = []
 137         for alu in self.units:
 138             req_rel_l.append(alu.req_rel_o)
 139             done_l.append(alu.done_o)
 140             shadow_l.append(alu.shadown_i)
 141             godie_l.append(alu.go_die_i)
 142             print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 143             rd_rel0_l.append(alu.rd_rel_o[0])
 144             rd_rel1_l.append(alu.rd_rel_o[1])
 145             go_wr_l.append(alu.go_wr_i)
 146             go_rd_l0.append(alu.go_rd_i[0])
 147             go_rd_l1.append(alu.go_rd_i[1])
 148             issue_l.append(alu.issue_i)
 149             busy_l.append(alu.busy_o)
 150         comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
 151         comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
 152         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 153         comb += self.done_o.eq(Cat(*done_l))
 154         comb += self.busy_o.eq(Cat(*busy_l))
 155         comb += Cat(*godie_l).eq(self.go_die_i)
 156         comb += Cat(*shadow_l).eq(self.shadown_i)
 157         comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
 158         comb += Cat(*go_rd_l0).eq(self.rd0.go)
 159         comb += Cat(*go_rd_l1).eq(self.rd1.go)
 160         comb += Cat(*issue_l).eq(self.issue_i)
 161
 162         # connect data register input/output
 163
 164         # merge (OR) all integer FU / ALU outputs to a single value
 165         # XXX NOTE: this only works because there is a single "port"
 166         # protected by a single go_wr.  multi-issue requires a bus
 167         # to be inserted here.
 168         if self.units:
 169             data_o = treereduce(self.units, "data_o")
 170             comb += self.data_o.eq(data_o)
 171             if self.ldstmode:
 172                 addr_o = treereduce(self.units, "addr_o")
 173                 comb += self.addr_o.eq(addr_o)
 174
 175         for i, alu in enumerate(self.units):
 176             comb += alu.src1_i.eq(self.src1_i)
 177             comb += alu.src2_i.eq(self.src2_i)
 178
 179         if not self.ldstmode:
 180             return m
 181
 182         ldmem_l = []
 183         stmem_l = []
 184         go_ad_l = []
 185         go_st_l = []
 186         ld_l = []
 187         st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             ld_l.append(alu.ld_o)
 192             st_l.append(alu.st_o)
 193             adr_rel_l.append(alu.adr_rel_o)
 194             sto_rel_l.append(alu.sto_rel_o)
 195             ldmem_l.append(alu.load_mem_o)
 196             stmem_l.append(alu.stwd_mem_o)
 197             go_ad_l.append(alu.go_ad_i)
 198             go_st_l.append(alu.go_st_i)
 199         comb += self.ld_o.eq(Cat(*ld_l))
 200         comb += self.st_o.eq(Cat(*st_l))
 201         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 202         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 203         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 204         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 205         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 206         comb += Cat(*go_st_l).eq(self.go_st_i)
 207
 208         return m
 209
 210
 211 class CompUnitLDSTs(CompUnitsBase):
 212
 213     def __init__(self, rwid, opwid, n_ldsts, mem):
 214         """ Inputs:
 215
 216             * :rwid:   bit width of register file(s) - both FP and INT
 217             * :opwid:  operand bit width
 218         """
 219         self.opwid = opwid
 220
 221         # inputs
 222         self.op = CompALUOpSubset("cua_i")
 223
 224         # Int ALUs
 225         self.alus = []
 226         for i in range(n_ldsts):
 227             self.alus.append(ALU(rwid))
 228
 229         units = []
 230         for i, alu in enumerate(self.alus):
 231             # XXX disable the 2nd memory temporarily
 232             if i == 0:
 233                 debugtest = False
 234             else:
 235                 debugtest = True
 236             units.append(LDSTCompUnit(rwid, alu, mem, debugtest=debugtest))
 237
 238         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 239
 240     def elaborate(self, platform):
 241         m = CompUnitsBase.elaborate(self, platform)
 242         comb = m.d.comb
 243
 244         # hand the same operation to all units, 4 lower bits though
 245         for alu in self.units:
 246             comb += alu.oper_i.eq(self.op)
 247             comb += alu.isalu_i.eq(0)
 248
 249         return m
 250
 251
 252 class CompUnitALUs(CompUnitsBase):
 253
 254     def __init__(self, rwid, opwid, n_alus):
 255         """ Inputs:
 256
 257             * :rwid:   bit width of register file(s) - both FP and INT
 258             * :opwid:  operand bit width
 259         """
 260         self.opwid = opwid
 261
 262         # inputs
 263         self.op = CompALUOpSubset("cua_i")
 264         self.oper_i = Signal(opwid, reset_less=True)
 265         self.imm_i = Signal(rwid, reset_less=True)
 266
 267         # Int ALUs
 268         alus = []
 269         for i in range(n_alus):
 270             alus.append(ALU(rwid))
 271
 272         units = []
 273         for alu in alus:
 274             aluopwid = 3  # extra bit for immediate mode
 275             units.append(MultiCompUnit(rwid, alu))
 276
 277         CompUnitsBase.__init__(self, rwid, units)
 278
 279     def elaborate(self, platform):
 280         m = CompUnitsBase.elaborate(self, platform)
 281         comb = m.d.comb
 282
 283         # hand the subset of operation to ALUs
 284         for alu in self.units:
 285             comb += alu.oper_i.eq(self.op)
 286             #comb += alu.oper_i[0:3].eq(self.oper_i)
 287             #comb += alu.imm_i.eq(self.imm_i)
 288
 289         return m
 290
 291
 292 class CompUnitBR(CompUnitsBase):
 293
 294     def __init__(self, rwid, opwid):
 295         """ Inputs:
 296
 297             * :rwid:   bit width of register file(s) - both FP and INT
 298             * :opwid:  operand bit width
 299
 300             Note: bgt unit is returned so that a shadow unit can be created
 301             for it
 302         """
 303         self.opwid = opwid
 304
 305         # inputs
 306         self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
 307         self.oper_i = Signal(opwid, reset_less=True)
 308         self.imm_i = Signal(rwid, reset_less=True)
 309
 310         # Branch ALU and CU
 311         self.bgt = BranchALU(rwid)
 312         aluopwid = 3  # extra bit for immediate mode
 313         self.br1 = MultiCompUnit(rwid, self.bgt)
 314         CompUnitsBase.__init__(self, rwid, [self.br1])
 315
 316     def elaborate(self, platform):
 317         m = CompUnitsBase.elaborate(self, platform)
 318         comb = m.d.comb
 319
 320         # hand the same operation to all units
 321         for alu in self.units:
 322             #comb += alu.oper_i.eq(self.op) # TODO
 323             comb += alu.oper_i.eq(self.oper_i)
 324             #comb += alu.imm_i.eq(self.imm_i)
 325
 326         return m
 327
 328
 329 class FunctionUnits(Elaboratable):
 330
 331     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 332         self.n_src, self.n_dst = n_src, n_dst
 333         self.n_reg = n_reg
 334         self.n_int_alus = nf = n_int_alus
 335
 336         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 337         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 338
 339         self.readable_o = Signal(n_int_alus, reset_less=True)
 340         self.writable_o = Signal(n_int_alus, reset_less=True)
 341
 342         # arrays
 343         src = []
 344         rsel = []
 345         rd = []
 346         for i in range(n_src):
 347             j = i + 1 # name numbering to match src1/src2
 348             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 349             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 350             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 351         dst = []
 352         dsel = []
 353         wr = []
 354         for i in range(n_dst):
 355             j = i + 1 # name numbering to match src1/src2
 356             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 357             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 358             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 359         wpnd = []
 360         pend = []
 361         for i in range(nf):
 362             j = i + 1 # name numbering to match src1/src2
 363             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 364             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 365
 366         self.dest_i = Array(dst)     # Dest in (top)
 367         self.src_i = Array(src)      # oper in (top)
 368
 369         # for Register File Select Lines (horizontal), per-reg
 370         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 371         self.src_rsel_o = Array(rsel)  # src reg (bot)
 372
 373         self.go_rd_i = Array(rd)
 374         self.go_wr_i = Array(wr)
 375
 376         self.go_die_i = Signal(n_int_alus, reset_less=True)
 377         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 378
 379         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 380
 381     def elaborate(self, platform):
 382         m = Module()
 383         comb = m.d.comb
 384         sync = m.d.sync
 385
 386         n_intfus = self.n_int_alus
 387
 388         # Integer FU-FU Dep Matrix
 389         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 390         m.submodules.intfudeps = intfudeps
 391         # Integer FU-Reg Dep Matrix
 392         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 393         m.submodules.intregdeps = intregdeps
 394
 395         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 396         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 397
 398         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 399         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 400
 401         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 402         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 403         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 404
 405         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 406         comb += intfudeps.go_die_i.eq(self.go_die_i)
 407         comb += self.readable_o.eq(intfudeps.readable_o)
 408         comb += self.writable_o.eq(intfudeps.writable_o)
 409
 410         # Connect function issue / arrays, and dest/src1/src2
 411         for i in range(self.n_src):
 412             print (i, self.go_rd_i, intfudeps.go_rd_i)
 413             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 414             comb += intregdeps.src_i[i].eq(self.src_i[i])
 415             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 416             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 417         for i in range(self.n_dst):
 418             print (i, self.go_wr_i, intfudeps.go_wr_i)
 419             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 420             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 421             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 422             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 423         comb += intregdeps.go_die_i.eq(self.go_die_i)
 424         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 425
 426         return m
 427
 428
 429 class Scoreboard(Elaboratable):
 430     def __init__(self, rwid, n_regs):
 431         """ Inputs:
 432
 433             * :rwid:   bit width of register file(s) - both FP and INT
 434             * :n_regs: depth of register file(s) - number of FP and INT regs
 435         """
 436         self.rwid = rwid
 437         self.n_regs = n_regs
 438
 439         # Register Files
 440         self.intregs = RegFileArray(rwid, n_regs)
 441         self.fpregs = RegFileArray(rwid, n_regs)
 442
 443         # Memory (test for now)
 444         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 445
 446         # issue q needs to get at these
 447         self.aluissue = IssueUnitGroup(2)
 448         self.lsissue = IssueUnitGroup(2)
 449         self.brissue = IssueUnitGroup(1)
 450         # and these
 451         self.alu_op = CompALUOpSubset("alu")
 452         self.br_oper_i = Signal(4, reset_less=True)
 453         self.br_imm_i = Signal(rwid, reset_less=True)
 454         self.ls_oper_i = Signal(4, reset_less=True)
 455
 456         # inputs
 457         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 458         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 459         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 460         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 461
 462         # outputs
 463         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 464         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 465
 466         # for branch speculation experiment.  branch_direction = 0 if
 467         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 468         # branch_succ and branch_fail are requests to have the current
 469         # instruction be dependent on the branch unit "shadow" capability.
 470         self.branch_succ_i = Signal(reset_less=True)
 471         self.branch_fail_i = Signal(reset_less=True)
 472         self.branch_direction_o = Signal(2, reset_less=True)
 473
 474     def elaborate(self, platform):
 475         m = Module()
 476         comb = m.d.comb
 477         sync = m.d.sync
 478
 479         m.submodules.intregs = self.intregs
 480         m.submodules.fpregs = self.fpregs
 481         m.submodules.mem = mem = self.mem
 482
 483         # register ports
 484         int_dest = self.intregs.write_port("dest")
 485         int_src1 = self.intregs.read_port("src1")
 486         int_src2 = self.intregs.read_port("src2")
 487
 488         fp_dest = self.fpregs.write_port("dest")
 489         fp_src1 = self.fpregs.read_port("src1")
 490         fp_src2 = self.fpregs.read_port("src2")
 491
 492         # Int ALUs and BR ALUs
 493         n_int_alus = 5
 494         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 495         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 496
 497         # LDST Comp Units
 498         n_ldsts = 2
 499         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 500
 501         # Comp Units
 502         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 503         bgt = cub.bgt  # get at the branch computation unit
 504         br1 = cub.br1
 505
 506         # Int FUs
 507         fu_n_src = 2
 508         fu_n_dst = 1
 509         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 510                                                      fu_n_src, fu_n_dst)
 511
 512         # Memory FUs
 513         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 514
 515         # Memory Priority Picker 1: one gateway per memory port
 516         # picks 1 reader and 1 writer to intreg
 517         mempick1 = GroupPicker(n_ldsts, 1, 1)
 518         m.submodules.mempick1 = mempick1
 519
 520         # Count of number of FUs
 521         n_intfus = n_int_alus
 522         n_fp_fus = 0  # for now
 523
 524         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 525         # picks 1 reader and 1 writer to intreg
 526         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 527         m.submodules.intpick1 = ipick1
 528
 529         # INT/FP Issue Unit
 530         regdecode = RegDecode(self.n_regs)
 531         m.submodules.regdecode = regdecode
 532         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 533         m.submodules.issueunit = issueunit
 534
 535         # Shadow Matrix.  currently n_intfus shadows, to be used for
 536         # write-after-write hazards.  NOTE: there is one extra for branches,
 537         # so the shadow width is increased by 1
 538         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 539         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 540
 541         # record previous instruction to cast shadow on current instruction
 542         prev_shadow = Signal(n_intfus)
 543
 544         # Branch Speculation recorder.  tracks the success/fail state as
 545         # each instruction is issued, so that when the branch occurs the
 546         # allow/cancel can be issued as appropriate.
 547         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 548
 549         # ---------
 550         # ok start wiring things together...
 551         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 552         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 553         # ---------
 554
 555         # ---------
 556         # Issue Unit is where it starts.  set up some in/outs for this module
 557         # ---------
 558         comb += [regdecode.dest_i.eq(self.int_dest_i),
 559                  regdecode.src1_i.eq(self.int_src1_i),
 560                  regdecode.src2_i.eq(self.int_src2_i),
 561                  regdecode.enable_i.eq(self.reg_enable_i),
 562                  self.issue_o.eq(issueunit.issue_o)
 563                  ]
 564
 565         # take these to outside (issue needs them)
 566         comb += cua.op.eq(self.alu_op)
 567         comb += cub.oper_i.eq(self.br_oper_i)
 568         comb += cub.imm_i.eq(self.br_imm_i)
 569         comb += cul.op.eq(self.alu_op) # TODO: separate ls_op?
 570
 571         # TODO: issueunit.f (FP)
 572
 573         # and int function issue / busy arrays, and dest/src1/src2
 574         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 575         comb += intfus.src_i[0].eq(regdecode.src1_o)
 576         comb += intfus.src_i[1].eq(regdecode.src2_o)
 577
 578         fn_issue_o = issueunit.fn_issue_o
 579
 580         comb += intfus.fn_issue_i.eq(fn_issue_o)
 581         comb += issueunit.busy_i.eq(cu.busy_o)
 582         comb += self.busy_o.eq(cu.busy_o.bool())
 583
 584         # ---------
 585         # Memory Function Unit
 586         # ---------
 587         reset_b = Signal(cul.n_units, reset_less=True)
 588         # XXX was cul.go_wr_i not done.o
 589         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 590         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 591
 592         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 593         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 594         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 595
 596         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 597         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 598         # issue_i.  multi-issue gets a bit more complex but not a lot.
 599         prior_ldsts = Signal(cul.n_units, reset_less=True)
 600         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 601         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 602             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 603         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 604             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 605
 606         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 607         # just immediately activate go_adr
 608         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 609
 610         # connect up address data
 611         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 612         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 613
 614         # connect loadable / storable to go_ld/go_st.
 615         # XXX should only be done when the memory ld/st has actually happened!
 616         go_st_i = Signal(cul.n_units, reset_less=True)
 617         go_ld_i = Signal(cul.n_units, reset_less=True)
 618         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 619                            cul.adr_rel_o & cul.ld_o)
 620         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 621                            cul.sto_rel_o & cul.st_o)
 622         comb += memfus.go_ld_i.eq(go_ld_i)
 623         comb += memfus.go_st_i.eq(go_st_i)
 624         #comb += cul.go_wr_i.eq(go_ld_i)
 625         comb += cul.go_st_i.eq(go_st_i)
 626
 627         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 628         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 629         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 630
 631         # ---------
 632         # merge shadow matrices outputs
 633         # ---------
 634
 635         # these are explained in ShadowMatrix docstring, and are to be
 636         # connected to the FUReg and FUFU Matrices, to get them to reset
 637         anydie = Signal(n_intfus, reset_less=True)
 638         allshadown = Signal(n_intfus, reset_less=True)
 639         shreset = Signal(n_intfus, reset_less=True)
 640         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 641         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 642         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 643
 644         # ---------
 645         # connect fu-fu matrix
 646         # ---------
 647
 648         # Group Picker... done manually for now.
 649         go_rd_o = ipick1.go_rd_o
 650         go_wr_o = ipick1.go_wr_o
 651         go_rd_i = intfus.go_rd_i
 652         go_wr_i = intfus.go_wr_i
 653         go_die_i = intfus.go_die_i
 654         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 655         for i in range(fu_n_src):
 656             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 657         for i in range(fu_n_dst):
 658             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 659         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 660
 661         # Connect Picker
 662         # ---------
 663         int_rd_o = intfus.readable_o
 664         rrel_o = cu.rd_rel_o
 665         rqrl_o = cu.req_rel_o
 666         for i in range(fu_n_src):
 667             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 668             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 669         int_wr_o = intfus.writable_o
 670         for i in range(fu_n_dst):
 671             # XXX FIXME: rqrl_o[i] here
 672             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 673             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 674
 675         # ---------
 676         # Shadow Matrix
 677         # ---------
 678
 679         comb += shadows.issue_i.eq(fn_issue_o)
 680         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 681         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 682         # ---------
 683         # NOTE; this setup is for the instruction order preservation...
 684
 685         # connect shadows / go_dies to Computation Units
 686         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 687         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 688
 689         # ok connect first n_int_fu shadows to busy lines, to create an
 690         # instruction-order linked-list-like arrangement, using a bit-matrix
 691         # (instead of e.g. a ring buffer).
 692
 693         # when written, the shadow can be cancelled (and was good)
 694         for i in range(n_intfus):
 695             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 696             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 697             # because the comp unit is only free once completed
 698             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 699
 700         # *previous* instruction shadows *current* instruction, and, obviously,
 701         # if the previous is completed (!busy) don't cast the shadow!
 702         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 703         for i in range(n_intfus):
 704             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 705
 706         # ---------
 707         # ... and this is for branch speculation.  it uses the extra bit
 708         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 709         # only needs to set shadow_i, s_fail_i and s_good_i
 710
 711         # issue captures shadow_i (if enabled)
 712         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 713
 714         bactive = Signal(reset_less=True)
 715         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 716
 717         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 718         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 719             comb += bshadow.issue_i.eq(fn_issue_o)
 720             for i in range(n_intfus):
 721                 with m.If(fn_issue_o & (Const(1 << i))):
 722                     comb += bshadow.shadow_i[i][0].eq(1)
 723
 724         # finally, we need an indicator to the test infrastructure as to
 725         # whether the branch succeeded or failed, plus, link up to the
 726         # "recorder" of whether the instruction was under shadow or not
 727
 728         with m.If(br1.issue_i):
 729             sync += bspec.active_i.eq(1)
 730         with m.If(self.branch_succ_i):
 731             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 732         with m.If(self.branch_fail_i):
 733             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 734
 735         # branch is active (TODO: a better signal: this is over-using the
 736         # go_write signal - actually the branch should not be "writing")
 737         with m.If(br1.go_wr_i):
 738             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 739             sync += bspec.active_i.eq(0)
 740             comb += bspec.br_i.eq(1)
 741             # branch occurs if data == 1, failed if data == 0
 742             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 743             for i in range(n_intfus):
 744                 # *expected* direction of the branch matched against *actual*
 745                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 746                 # ... or it didn't
 747                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 748
 749         # ---------
 750         # Connect Register File(s)
 751         # ---------
 752         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 753         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 754         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 755
 756         # connect ALUs to regfile
 757         comb += int_dest.data_i.eq(cu.data_o)
 758         comb += cu.src1_i.eq(int_src1.data_o)
 759         comb += cu.src2_i.eq(int_src2.data_o)
 760
 761         # connect ALU Computation Units
 762         for i in range(fu_n_src):
 763             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 764         for i in range(fu_n_dst):
 765             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 766         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 767
 768         return m
 769
 770     def __iter__(self):
 771         yield from self.intregs
 772         yield from self.fpregs
 773         yield self.int_dest_i
 774         yield self.int_src1_i
 775         yield self.int_src2_i
 776         yield self.issue_o
 777         yield self.branch_succ_i
 778         yield self.branch_fail_i
 779         yield self.branch_direction_o
 780
 781     def ports(self):
 782         return list(self)
 783
 784
 785 class IssueToScoreboard(Elaboratable):
 786
 787     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 788         self.qlen = qlen
 789         self.n_in = n_in
 790         self.n_out = n_out
 791         self.rwid = rwid
 792         self.opw = opwid
 793         self.n_regs = n_regs
 794
 795         mqbits = unsigned(int(log(qlen) / log(2))+2)
 796         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 797         self.p_ready_o = Signal()  # instructions were added
 798         self.data_i = Instruction._nq(n_in, "data_i")
 799
 800         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 801         self.qlen_o = Signal(mqbits, reset_less=True)
 802
 803     def elaborate(self, platform):
 804         m = Module()
 805         comb = m.d.comb
 806         sync = m.d.sync
 807
 808         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 809                           self.n_in, self.n_out)
 810         sc = Scoreboard(self.rwid, self.n_regs)
 811         m.submodules.iq = iq
 812         m.submodules.sc = sc
 813
 814         # get at the regfile for testing
 815         self.intregs = sc.intregs
 816
 817         # and the "busy" signal and instruction queue length
 818         comb += self.busy_o.eq(sc.busy_o)
 819         comb += self.qlen_o.eq(iq.qlen_o)
 820
 821         # link up instruction queue
 822         comb += iq.p_add_i.eq(self.p_add_i)
 823         comb += self.p_ready_o.eq(iq.p_ready_o)
 824         for i in range(self.n_in):
 825             comb += eq(iq.data_i[i], self.data_i[i])
 826
 827         # take instruction and process it.  note that it's possible to
 828         # "inspect" the queue contents *without* actually removing the
 829         # items.  items are only removed when the
 830
 831         # in "waiting" state
 832         wait_issue_br = Signal()
 833         wait_issue_alu = Signal()
 834         wait_issue_ls = Signal()
 835
 836         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 837             # set instruction pop length to 1 if the unit accepted
 838             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 839                 with m.If(iq.qlen_o != 0):
 840                     comb += iq.n_sub_i.eq(1)
 841             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 842                 with m.If(iq.qlen_o != 0):
 843                     comb += iq.n_sub_i.eq(1)
 844             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 845                 with m.If(iq.qlen_o != 0):
 846                     comb += iq.n_sub_i.eq(1)
 847
 848         # see if some instruction(s) are here.  note that this is
 849         # "inspecting" the in-place queue.  note also that on the
 850         # cycle following "waiting" for fn_issue_o to be set, the
 851         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 852         with m.If(iq.qlen_o != 0):
 853             # get the operands and operation
 854             instr = iq.data_o[0]
 855             imm = instr.imm_data.data
 856             dest = instr.write_reg.data
 857             src1 = instr.read_reg1.data
 858             src2 = instr.read_reg2.data
 859             op = instr.insn_type
 860             fu = instr.fn_unit
 861             opi = instr.imm_data.ok  # immediate set
 862
 863             # set the src/dest regs
 864             comb += sc.int_dest_i.eq(dest)
 865             comb += sc.int_src1_i.eq(src1)
 866             comb += sc.int_src2_i.eq(src2)
 867             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 868
 869             # choose a Function-Unit-Group
 870             with m.If(fu == Function.ALU):  # alu
 871                 comb += sc.alu_op.eq_from_execute1(instr)
 872                 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
 873                 comb += wait_issue_alu.eq(1)
 874             with m.Elif(fu == Function.LDST):  # ld/st
 875                 comb += sc.alu_op.eq_from_execute1(instr) # XXX separate ls_op?
 876                 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
 877                 comb += wait_issue_ls.eq(1)
 878
 879             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 880                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 881                 comb += sc.br_imm_i.eq(imm)
 882                 comb += sc.brissue.insn_i.eq(1)
 883                 comb += wait_issue_br.eq(1)
 884             # XXX TODO
 885             # these indicate that the instruction is to be made
 886             # shadow-dependent on
 887             # (either) branch success or branch fail
 888             # yield sc.branch_fail_i.eq(branch_fail)
 889             # yield sc.branch_succ_i.eq(branch_success)
 890
 891         return m
 892
 893     def __iter__(self):
 894         yield self.p_ready_o
 895         for o in self.data_i:
 896             yield from list(o)
 897         yield self.p_add_i
 898
 899     def ports(self):
 900         return list(self)
 901
 902
 903 def power_instr_q(dut, pdecode2, ins, code):
 904     instrs = [pdecode2.e]
 905
 906     sendlen = 1
 907     for idx, instr in enumerate(instrs):
 908         yield dut.data_i[idx].eq(instr)
 909         insn_type = yield instr.insn_type
 910         fn_unit = yield instr.fn_unit
 911         print("senddata ", idx, insn_type, fn_unit, instr)
 912     yield dut.p_add_i.eq(sendlen)
 913     yield
 914     o_p_ready = yield dut.p_ready_o
 915     while not o_p_ready:
 916         yield
 917         o_p_ready = yield dut.p_ready_o
 918
 919     yield dut.p_add_i.eq(0)
 920
 921
 922 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 923             branch_success, branch_fail):
 924     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 925                 'imm_data': (imm, op_imm),
 926                'read_reg1': src1, 'read_reg2': src2}]
 927
 928     sendlen = 1
 929     for idx, instr in enumerate(instrs):
 930         imm, op_imm = instr['imm_data']
 931         reg1 = instr['read_reg1']
 932         reg2 = instr['read_reg2']
 933         dest = instr['write_reg']
 934         insn_type = instr['insn_type']
 935         fn_unit = instr['fn_unit']
 936         yield dut.data_i[idx].insn_type.eq(insn_type)
 937         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 938         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 939         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 940         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 941         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 942         yield dut.data_i[idx].write_reg.data.eq(dest)
 943         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 944         yield dut.data_i[idx].imm_data.data.eq(imm)
 945         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 946         di = yield dut.data_i[idx]
 947         print("senddata %d %x" % (idx, di))
 948     yield dut.p_add_i.eq(sendlen)
 949     yield
 950     o_p_ready = yield dut.p_ready_o
 951     while not o_p_ready:
 952         yield
 953         o_p_ready = yield dut.p_ready_o
 954
 955     yield dut.p_add_i.eq(0)
 956
 957
 958 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 959     yield from disable_issue(dut)
 960     yield dut.int_dest_i.eq(dest)
 961     yield dut.int_src1_i.eq(src1)
 962     yield dut.int_src2_i.eq(src2)
 963     if (op & (0x3 << 2)) != 0:  # branch
 964         yield dut.brissue.insn_i.eq(1)
 965         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 966         yield dut.br_imm_i.eq(imm)
 967         dut_issue = dut.brissue
 968     else:
 969         yield dut.aluissue.insn_i.eq(1)
 970         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 971         yield dut.alu_imm_i.eq(imm)
 972         dut_issue = dut.aluissue
 973     yield dut.reg_enable_i.eq(1)
 974
 975     # these indicate that the instruction is to be made shadow-dependent on
 976     # (either) branch success or branch fail
 977     yield dut.branch_fail_i.eq(branch_fail)
 978     yield dut.branch_succ_i.eq(branch_success)
 979
 980     yield
 981     yield from wait_for_issue(dut, dut_issue)
 982
 983
 984 def print_reg(dut, rnums):
 985     rs = []
 986     for rnum in rnums:
 987         reg = yield dut.intregs.regs[rnum].reg
 988         rs.append("%x" % reg)
 989     rnums = map(str, rnums)
 990     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 991
 992
 993 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 994     insts = []
 995     for i in range(n_ops):
 996         src1 = randint(1, dut.n_regs-1)
 997         src2 = randint(1, dut.n_regs-1)
 998         imm = randint(1, (1 << dut.rwid)-1)
 999         dest = randint(1, dut.n_regs-1)
1000         op = randint(0, max_opnums)
1001         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
1002
1003         if shadowing:
1004             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1005         else:
1006             insts.append((src1, src2, dest, op, opi, imm))
1007     return insts
1008
1009
1010 def wait_for_busy_clear(dut):
1011     while True:
1012         busy_o = yield dut.busy_o
1013         if not busy_o:
1014             break
1015         print("busy",)
1016         yield
1017
1018
1019 def disable_issue(dut):
1020     yield dut.aluissue.insn_i.eq(0)
1021     yield dut.brissue.insn_i.eq(0)
1022     yield dut.lsissue.insn_i.eq(0)
1023
1024
1025 def wait_for_issue(dut, dut_issue):
1026     while True:
1027         issue_o = yield dut_issue.fn_issue_o
1028         if issue_o:
1029             yield from disable_issue(dut)
1030             yield dut.reg_enable_i.eq(0)
1031             break
1032         print("busy",)
1033         # yield from print_reg(dut, [1,2,3])
1034         yield
1035     # yield from print_reg(dut, [1,2,3])
1036
1037
1038 def scoreboard_branch_sim(dut, alusim):
1039
1040     iseed = 3
1041
1042     for i in range(1):
1043
1044         print("rseed", iseed)
1045         seed(iseed)
1046         iseed += 1
1047
1048         yield dut.branch_direction_o.eq(0)
1049
1050         # set random values in the registers
1051         for i in range(1, dut.n_regs):
1052             val = 31+i*3
1053             val = randint(0, (1 << alusim.rwidth)-1)
1054             yield dut.intregs.regs[i].reg.eq(val)
1055             alusim.setval(i, val)
1056
1057         if False:
1058             # create some instructions: branches create a tree
1059             insts = create_random_ops(dut, 1, True, 1)
1060             #insts.append((6, 6, 1, 2, (0, 0)))
1061             #insts.append((4, 3, 3, 0, (0, 0)))
1062
1063             src1 = randint(1, dut.n_regs-1)
1064             src2 = randint(1, dut.n_regs-1)
1065             #op = randint(4, 7)
1066             op = 4  # only BGT at the moment
1067
1068             branch_ok = create_random_ops(dut, 1, True, 1)
1069             branch_fail = create_random_ops(dut, 1, True, 1)
1070
1071             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1072
1073         if True:
1074             insts = []
1075             insts.append((3, 5, 2, 0, (0, 0)))
1076             branch_ok = []
1077             branch_fail = []
1078             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1079             branch_ok.append(None)
1080             branch_fail.append((1, 1, 2, 0, (0, 1)))
1081             #branch_fail.append( None )
1082             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1083
1084         siminsts = deepcopy(insts)
1085
1086         # issue instruction(s)
1087         i = -1
1088         instrs = insts
1089         branch_direction = 0
1090         while instrs:
1091             yield
1092             yield
1093             i += 1
1094             branch_direction = yield dut.branch_direction_o  # way branch went
1095             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1096             if branch_direction == 1 and shadow_on:
1097                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1098                 continue  # branch was "success" and this is a "failed"... skip
1099             if branch_direction == 2 and shadow_off:
1100                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1101                 continue  # branch was "fail" and this is a "success"... skip
1102             if branch_direction != 0:
1103                 shadow_on = 0
1104                 shadow_off = 0
1105             is_branch = op >= 4
1106             if is_branch:
1107                 branch_ok, branch_fail = dest
1108                 dest = src2
1109                 # ok zip up the branch success / fail instructions and
1110                 # drop them into the queue, one marked "to have branch success"
1111                 # the other to be marked shadow branch "fail".
1112                 # one out of each of these will be cancelled
1113                 for ok, fl in zip(branch_ok, branch_fail):
1114                     if ok:
1115                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1116                     if fl:
1117                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1118             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1119                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1120             yield from int_instr(dut, op, src1, src2, dest,
1121                                  shadow_on, shadow_off)
1122
1123         # wait for all instructions to stop before checking
1124         yield
1125         yield from wait_for_busy_clear(dut)
1126
1127         i = -1
1128         while siminsts:
1129             instr = siminsts.pop(0)
1130             if instr is None:
1131                 continue
1132             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1133             i += 1
1134             is_branch = op >= 4
1135             if is_branch:
1136                 branch_ok, branch_fail = dest
1137                 dest = src2
1138             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1139                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1140             branch_res = alusim.op(op, src1, src2, dest)
1141             if is_branch:
1142                 if branch_res:
1143                     siminsts += branch_ok
1144                 else:
1145                     siminsts += branch_fail
1146
1147         # check status
1148         yield from alusim.check(dut)
1149         yield from alusim.dump(dut)
1150
1151
1152 def power_sim(m, dut, pdecode2, instruction, alusim):
1153
1154     seed(0)
1155
1156     for i in range(1):
1157
1158         # set random values in the registers
1159         for i in range(1, dut.n_regs):
1160             #val = randint(0, (1<<alusim.rwidth)-1)
1161             #val = 31+i*3
1162             val = i # XXX actually, not random at all
1163             yield dut.intregs.regs[i].reg.eq(val)
1164             alusim.setval(i, val)
1165
1166         # create some instructions
1167         lst = []
1168         if False:
1169             lst += ["addi 2, 0, 0x4321",
1170                    "addi 3, 0, 0x1234",
1171                    "add  1, 3, 2",
1172                    "add  4, 3, 5"
1173                     ]
1174         if True:
1175             lst += [ "lbz 6, 7(2)",
1176                    ]
1177
1178         with Program(lst) as program:
1179             gen = program.generate_instructions()
1180
1181             # issue instruction(s), wait for issue to be free before proceeding
1182             for ins, code in zip(gen, program.assembly.splitlines()):
1183                 yield instruction.eq(ins)          # raw binary instr.
1184                 yield #Delay(1e-6)
1185
1186                 print("binary 0x{:X}".format(ins & 0xffffffff))
1187                 print("assembly", code)
1188
1189                 #alusim.op(op, opi, imm, src1, src2, dest)
1190                 yield from power_instr_q(dut, pdecode2, ins, code)
1191
1192         # wait for all instructions to stop before checking
1193         while True:
1194             iqlen = yield dut.qlen_o
1195             if iqlen == 0:
1196                 break
1197             yield
1198         yield
1199         yield
1200         yield
1201         yield
1202         yield from wait_for_busy_clear(dut)
1203
1204         # check status
1205         yield from alusim.check(dut)
1206         yield from alusim.dump(dut)
1207
1208
1209 def scoreboard_sim(dut, alusim):
1210
1211     seed(0)
1212
1213     for i in range(1):
1214
1215         # set random values in the registers
1216         for i in range(1, dut.n_regs):
1217             #val = randint(0, (1<<alusim.rwidth)-1)
1218             #val = 31+i*3
1219             val = i
1220             yield dut.intregs.regs[i].reg.eq(val)
1221             alusim.setval(i, val)
1222
1223         # create some instructions (some random, some regression tests)
1224         instrs = []
1225         if False:
1226             instrs = create_random_ops(dut, 15, True, 4)
1227
1228         if False:  # LD/ST test (with immediate)
1229             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1230             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1231
1232         if False:
1233             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1234
1235         if False:
1236             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1237             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1238             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1239
1240         if True:
1241             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1242                            0, 0, (0, 0)))
1243             instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1244                            0, 0, (0, 0)))
1245         if False:
1246             instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1247                            1, 7, (0, 0)))
1248         if False:
1249             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1250                            0, 0, (0, 0)))
1251
1252         if False:
1253             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1254             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1255             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1256             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1257             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1258
1259         if False:
1260             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1261             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1262             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1263
1264         if False:
1265             instrs.append((5, 6, 2, 1))
1266             instrs.append((2, 2, 4, 0))
1267             #instrs.append((2, 2, 3, 1))
1268
1269         if False:
1270             instrs.append((2, 1, 2, 3))
1271
1272         if False:
1273             instrs.append((2, 6, 2, 1))
1274             instrs.append((2, 1, 2, 0))
1275
1276         if False:
1277             instrs.append((1, 2, 7, 2))
1278             instrs.append((7, 1, 5, 0))
1279             instrs.append((4, 4, 1, 1))
1280
1281         if False:
1282             instrs.append((5, 6, 2, 2))
1283             instrs.append((1, 1, 4, 1))
1284             instrs.append((6, 5, 3, 0))
1285
1286         if False:
1287             # Write-after-Write Hazard
1288             instrs.append((3, 6, 7, 2))
1289             instrs.append((4, 4, 7, 1))
1290
1291         if False:
1292             # self-read/write-after-write followed by Read-after-Write
1293             instrs.append((1, 1, 1, 1))
1294             instrs.append((1, 5, 3, 0))
1295
1296         if False:
1297             # Read-after-Write followed by self-read-after-write
1298             instrs.append((5, 6, 1, 2))
1299             instrs.append((1, 1, 1, 1))
1300
1301         if False:
1302             # self-read-write sandwich
1303             instrs.append((5, 6, 1, 2))
1304             instrs.append((1, 1, 1, 1))
1305             instrs.append((1, 5, 3, 0))
1306
1307         if False:
1308             # very weird failure
1309             instrs.append((5, 2, 5, 2))
1310             instrs.append((2, 6, 3, 0))
1311             instrs.append((4, 2, 2, 1))
1312
1313         if False:
1314             v1 = 4
1315             yield dut.intregs.regs[5].reg.eq(v1)
1316             alusim.setval(5, v1)
1317             yield dut.intregs.regs[3].reg.eq(5)
1318             alusim.setval(3, 5)
1319             instrs.append((5, 3, 3, 4, (0, 0)))
1320             instrs.append((4, 2, 1, 2, (0, 1)))
1321
1322         if False:
1323             v1 = 6
1324             yield dut.intregs.regs[5].reg.eq(v1)
1325             alusim.setval(5, v1)
1326             yield dut.intregs.regs[3].reg.eq(5)
1327             alusim.setval(3, 5)
1328             instrs.append((5, 3, 3, 4, (0, 0)))
1329             instrs.append((4, 2, 1, 2, (1, 0)))
1330
1331         if False:
1332             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1333             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1334             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1335             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1336             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1337             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1338             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1339             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1340             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1341
1342         # issue instruction(s), wait for issue to be free before proceeding
1343         for i, instr in enumerate(instrs):
1344             print (i, instr)
1345             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1346
1347             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1348                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1349             alusim.op(op, opi, imm, src1, src2, dest)
1350             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1351                                br_ok, br_fail)
1352
1353         # wait for all instructions to stop before checking
1354         while True:
1355             iqlen = yield dut.qlen_o
1356             if iqlen == 0:
1357                 break
1358             yield
1359         yield
1360         yield
1361         yield
1362         yield
1363         yield from wait_for_busy_clear(dut)
1364
1365         # check status
1366         yield from alusim.check(dut)
1367         yield from alusim.dump(dut)
1368
1369
1370 def test_scoreboard():
1371     regwidth = 64
1372     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1373     alusim = RegSim(regwidth, 8)
1374     memsim = MemSim(16, 8)
1375
1376     m = Module()
1377     comb = m.d.comb
1378     instruction = Signal(32)
1379
1380     # set up the decoder (and simulator, later)
1381     pdecode = create_pdecode()
1382     #simulator = ISA(pdecode, initial_regs)
1383
1384     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1385     m.submodules.sim = dut
1386
1387     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1388     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1389
1390     vl = rtlil.convert(m, ports=dut.ports())
1391     with open("test_scoreboard6600.il", "w") as f:
1392         f.write(vl)
1393
1394     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1395                    vcd_name='test_powerboard6600.vcd')
1396
1397     #run_simulation(dut, scoreboard_sim(dut, alusim),
1398     #               vcd_name='test_scoreboard6600.vcd')
1399
1400     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1401     #                    vcd_name='test_scoreboard6600.vcd')
1402
1403
1404 if __name__ == '__main__':
1405     test_scoreboard()