src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst import LDSTCompUnit
  20 from soc.experiment.testmem import TestMemory
  21
  22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  23
  24 from soc.decoder.power_enums import InternalOp, Function
  25 from soc.decoder.power_decoder import (create_pdecode)
  26 from soc.decoder.power_decoder2 import (PowerDecode2)
  27 from soc.simulator.program import Program
  28
  29
  30 from nmutil.latch import SRLatch
  31 from nmutil.nmoperator import eq
  32
  33 from random import randint, seed
  34 from copy import deepcopy
  35 from math import log
  36
  37 from soc.experiment.sim import RegSim, MemSim
  38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  39
  40
  41 class CompUnitsBase(Elaboratable):
  42     """ Computation Unit Base class.
  43
  44         Amazingly, this class works recursively.  It's supposed to just
  45         look after some ALUs (that can handle the same operations),
  46         grouping them together, however it turns out that the same code
  47         can also group *groups* of Computation Units together as well.
  48
  49         Basically it was intended just to concatenate the ALU's issue,
  50         go_rd etc. signals together, which start out as bits and become
  51         sequences.  Turns out that the same trick works just as well
  52         on Computation Units!
  53
  54         So this class may be used recursively to present a top-level
  55         sequential concatenation of all the signals in and out of
  56         ALUs, whilst at the same time making it convenient to group
  57         ALUs together.
  58
  59         At the lower level, the intent is that groups of (identical)
  60         ALUs may be passed the same operation.  Even beyond that,
  61         the intent is that that group of (identical) ALUs actually
  62         share the *same pipeline* and as such become a "Concurrent
  63         Computation Unit" as defined by Mitch Alsup (see section
  64         11.4.9.3)
  65     """
  66
  67     def __init__(self, rwid, units, ldstmode=False):
  68         """ Inputs:
  69
  70             * :rwid:   bit width of register file(s) - both FP and INT
  71             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  72         """
  73         self.units = units
  74         self.ldstmode = ldstmode
  75         self.rwid = rwid
  76         self.rwid = rwid
  77         if units and isinstance(units[0], CompUnitsBase):
  78             self.n_units = 0
  79             for u in self.units:
  80                 self.n_units += u.n_units
  81         else:
  82             self.n_units = len(units)
  83
  84         n_units = self.n_units
  85
  86         # inputs
  87         self.issue_i = Signal(n_units, reset_less=True)
  88         self.rd0 = go_record(n_units, "rd0")
  89         self.rd1 = go_record(n_units, "rd1")
  90         self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
  91         self.wr0 = go_record(n_units, "wr0")
  92         self.go_wr_i = [self.wr0.go]
  93         self.shadown_i = Signal(n_units, reset_less=True)
  94         self.go_die_i = Signal(n_units, reset_less=True)
  95         if ldstmode:
  96             self.go_ad_i = Signal(n_units, reset_less=True)
  97             self.go_st_i = Signal(n_units, reset_less=True)
  98
  99         # outputs
 100         self.busy_o = Signal(n_units, reset_less=True)
 101         self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
 102         self.req_rel_o = self.wr0.rel
 103         self.done_o = Signal(n_units, reset_less=True)
 104         if ldstmode:
 105             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 106             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 107             self.adr_rel_o = Signal(n_units, reset_less=True)
 108             self.sto_rel_o = Signal(n_units, reset_less=True)
 109             self.load_mem_o = Signal(n_units, reset_less=True)
 110             self.stwd_mem_o = Signal(n_units, reset_less=True)
 111             self.addr_o = Signal(rwid, reset_less=True)
 112
 113         # in/out register data (note: not register#, actual data)
 114         self.data_o = Signal(rwid, reset_less=True)
 115         self.src1_i = Signal(rwid, reset_less=True)
 116         self.src2_i = Signal(rwid, reset_less=True)
 117         # input operand
 118
 119     def elaborate(self, platform):
 120         m = Module()
 121         comb = m.d.comb
 122
 123         for i, alu in enumerate(self.units):
 124             setattr(m.submodules, "comp%d" % i, alu)
 125
 126         go_rd_l0 = []
 127         go_rd_l1 = []
 128         go_wr_l = []
 129         issue_l = []
 130         busy_l = []
 131         req_rel_l = []
 132         done_l = []
 133         rd_rel0_l = []
 134         rd_rel1_l = []
 135         shadow_l = []
 136         godie_l = []
 137         for alu in self.units:
 138             req_rel_l.append(alu.req_rel_o)
 139             done_l.append(alu.done_o)
 140             shadow_l.append(alu.shadown_i)
 141             godie_l.append(alu.go_die_i)
 142             print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 143             rd_rel0_l.append(alu.rd_rel_o[0])
 144             rd_rel1_l.append(alu.rd_rel_o[1])
 145             go_wr_l.append(alu.go_wr_i)
 146             go_rd_l0.append(alu.go_rd_i[0])
 147             go_rd_l1.append(alu.go_rd_i[1])
 148             issue_l.append(alu.issue_i)
 149             busy_l.append(alu.busy_o)
 150         comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
 151         comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
 152         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 153         comb += self.done_o.eq(Cat(*done_l))
 154         comb += self.busy_o.eq(Cat(*busy_l))
 155         comb += Cat(*godie_l).eq(self.go_die_i)
 156         comb += Cat(*shadow_l).eq(self.shadown_i)
 157         comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
 158         comb += Cat(*go_rd_l0).eq(self.rd0.go)
 159         comb += Cat(*go_rd_l1).eq(self.rd1.go)
 160         comb += Cat(*issue_l).eq(self.issue_i)
 161
 162         # connect data register input/output
 163
 164         # merge (OR) all integer FU / ALU outputs to a single value
 165         # XXX NOTE: this only works because there is a single "port"
 166         # protected by a single go_wr.  multi-issue requires a bus
 167         # to be inserted here.
 168         if self.units:
 169             data_o = treereduce(self.units, "data_o")
 170             comb += self.data_o.eq(data_o)
 171             if self.ldstmode:
 172                 addr_o = treereduce(self.units, "addr_o")
 173                 comb += self.addr_o.eq(addr_o)
 174
 175         for i, alu in enumerate(self.units):
 176             comb += alu.src1_i.eq(self.src1_i)
 177             comb += alu.src2_i.eq(self.src2_i)
 178
 179         if not self.ldstmode:
 180             return m
 181
 182         ldmem_l = []
 183         stmem_l = []
 184         go_ad_l = []
 185         go_st_l = []
 186         ld_l = []
 187         st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             ld_l.append(alu.ld_o)
 192             st_l.append(alu.st_o)
 193             adr_rel_l.append(alu.adr_rel_o)
 194             sto_rel_l.append(alu.sto_rel_o)
 195             ldmem_l.append(alu.load_mem_o)
 196             stmem_l.append(alu.stwd_mem_o)
 197             go_ad_l.append(alu.go_ad_i)
 198             go_st_l.append(alu.go_st_i)
 199         comb += self.ld_o.eq(Cat(*ld_l))
 200         comb += self.st_o.eq(Cat(*st_l))
 201         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 202         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 203         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 204         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 205         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 206         comb += Cat(*go_st_l).eq(self.go_st_i)
 207
 208         return m
 209
 210
 211 class CompUnitLDSTs(CompUnitsBase):
 212
 213     def __init__(self, rwid, opwid, n_ldsts, mem):
 214         """ Inputs:
 215
 216             * :rwid:   bit width of register file(s) - both FP and INT
 217             * :opwid:  operand bit width
 218         """
 219         self.opwid = opwid
 220
 221         # inputs
 222         self.oper_i = Signal(opwid, reset_less=True)
 223         self.imm_i = Signal(rwid, reset_less=True)
 224
 225         # Int ALUs
 226         self.alus = []
 227         for i in range(n_ldsts):
 228             self.alus.append(ALU(rwid))
 229
 230         units = []
 231         for alu in self.alus:
 232             units.append(LDSTCompUnit(rwid, alu, mem))
 233
 234         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 235
 236     def elaborate(self, platform):
 237         m = CompUnitsBase.elaborate(self, platform)
 238         comb = m.d.comb
 239
 240         # hand the same operation to all units, 4 lower bits though
 241         for alu in self.units:
 242             comb += alu.oper_i[0:4].eq(self.oper_i)
 243             #comb += alu.imm_i.eq(self.imm_i)
 244             comb += alu.isalu_i.eq(0)
 245
 246         return m
 247
 248
 249 class CompUnitALUs(CompUnitsBase):
 250
 251     def __init__(self, rwid, opwid, n_alus):
 252         """ Inputs:
 253
 254             * :rwid:   bit width of register file(s) - both FP and INT
 255             * :opwid:  operand bit width
 256         """
 257         self.opwid = opwid
 258
 259         # inputs
 260         self.op = CompALUOpSubset("cua_i")
 261         self.oper_i = Signal(opwid, reset_less=True)
 262         self.imm_i = Signal(rwid, reset_less=True)
 263
 264         # Int ALUs
 265         alus = []
 266         for i in range(n_alus):
 267             alus.append(ALU(rwid))
 268
 269         units = []
 270         for alu in alus:
 271             aluopwid = 3  # extra bit for immediate mode
 272             units.append(MultiCompUnit(rwid, alu))
 273
 274         CompUnitsBase.__init__(self, rwid, units)
 275
 276     def elaborate(self, platform):
 277         m = CompUnitsBase.elaborate(self, platform)
 278         comb = m.d.comb
 279
 280         # hand the subset of operation to ALUs
 281         for alu in self.units:
 282             comb += alu.oper_i.eq(self.op)
 283             #comb += alu.oper_i[0:3].eq(self.oper_i)
 284             #comb += alu.imm_i.eq(self.imm_i)
 285
 286         return m
 287
 288
 289 class CompUnitBR(CompUnitsBase):
 290
 291     def __init__(self, rwid, opwid):
 292         """ Inputs:
 293
 294             * :rwid:   bit width of register file(s) - both FP and INT
 295             * :opwid:  operand bit width
 296
 297             Note: bgt unit is returned so that a shadow unit can be created
 298             for it
 299         """
 300         self.opwid = opwid
 301
 302         # inputs
 303         self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
 304         self.oper_i = Signal(opwid, reset_less=True)
 305         self.imm_i = Signal(rwid, reset_less=True)
 306
 307         # Branch ALU and CU
 308         self.bgt = BranchALU(rwid)
 309         aluopwid = 3  # extra bit for immediate mode
 310         self.br1 = MultiCompUnit(rwid, self.bgt)
 311         CompUnitsBase.__init__(self, rwid, [self.br1])
 312
 313     def elaborate(self, platform):
 314         m = CompUnitsBase.elaborate(self, platform)
 315         comb = m.d.comb
 316
 317         # hand the same operation to all units
 318         for alu in self.units:
 319             #comb += alu.oper_i.eq(self.op) # TODO
 320             comb += alu.oper_i.eq(self.oper_i)
 321             #comb += alu.imm_i.eq(self.imm_i)
 322
 323         return m
 324
 325
 326 class FunctionUnits(Elaboratable):
 327
 328     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 329         self.n_src, self.n_dst = n_src, n_dst
 330         self.n_reg = n_reg
 331         self.n_int_alus = nf = n_int_alus
 332
 333         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 334         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         # arrays
 340         src = []
 341         rsel = []
 342         rd = []
 343         for i in range(n_src):
 344             j = i + 1 # name numbering to match src1/src2
 345             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 346             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 347             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 348         dst = []
 349         dsel = []
 350         wr = []
 351         for i in range(n_dst):
 352             j = i + 1 # name numbering to match src1/src2
 353             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 354             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 355             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 356         wpnd = []
 357         pend = []
 358         for i in range(nf):
 359             j = i + 1 # name numbering to match src1/src2
 360             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 361             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 362
 363         self.dest_i = Array(dst)     # Dest in (top)
 364         self.src_i = Array(src)      # oper in (top)
 365
 366         # for Register File Select Lines (horizontal), per-reg
 367         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 368         self.src_rsel_o = Array(rsel)  # src reg (bot)
 369
 370         self.go_rd_i = Array(rd)
 371         self.go_wr_i = Array(wr)
 372
 373         self.go_die_i = Signal(n_int_alus, reset_less=True)
 374         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 375
 376         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 377
 378     def elaborate(self, platform):
 379         m = Module()
 380         comb = m.d.comb
 381         sync = m.d.sync
 382
 383         n_intfus = self.n_int_alus
 384
 385         # Integer FU-FU Dep Matrix
 386         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 387         m.submodules.intfudeps = intfudeps
 388         # Integer FU-Reg Dep Matrix
 389         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 390         m.submodules.intregdeps = intregdeps
 391
 392         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 393         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 394
 395         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 396         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 397
 398         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 399         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 400         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 401
 402         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 403         comb += intfudeps.go_die_i.eq(self.go_die_i)
 404         comb += self.readable_o.eq(intfudeps.readable_o)
 405         comb += self.writable_o.eq(intfudeps.writable_o)
 406
 407         # Connect function issue / arrays, and dest/src1/src2
 408         for i in range(self.n_src):
 409             print (i, self.go_rd_i, intfudeps.go_rd_i)
 410             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 411             comb += intregdeps.src_i[i].eq(self.src_i[i])
 412             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 413             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 414         for i in range(self.n_dst):
 415             print (i, self.go_wr_i, intfudeps.go_wr_i)
 416             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 417             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 418             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 419             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 420         comb += intregdeps.go_die_i.eq(self.go_die_i)
 421         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 422
 423         return m
 424
 425
 426 class Scoreboard(Elaboratable):
 427     def __init__(self, rwid, n_regs):
 428         """ Inputs:
 429
 430             * :rwid:   bit width of register file(s) - both FP and INT
 431             * :n_regs: depth of register file(s) - number of FP and INT regs
 432         """
 433         self.rwid = rwid
 434         self.n_regs = n_regs
 435
 436         # Register Files
 437         self.intregs = RegFileArray(rwid, n_regs)
 438         self.fpregs = RegFileArray(rwid, n_regs)
 439
 440         # Memory (test for now)
 441         self.mem = TestMemory(self.rwid, 8)  # not too big, takes too long
 442
 443         # issue q needs to get at these
 444         self.aluissue = IssueUnitGroup(2)
 445         self.lsissue = IssueUnitGroup(2)
 446         self.brissue = IssueUnitGroup(1)
 447         # and these
 448         self.alu_op = CompALUOpSubset("alu")
 449         self.br_oper_i = Signal(4, reset_less=True)
 450         self.br_imm_i = Signal(rwid, reset_less=True)
 451         self.ls_oper_i = Signal(4, reset_less=True)
 452         self.ls_imm_i = Signal(rwid, reset_less=True)
 453
 454         # inputs
 455         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 456         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 457         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 458         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 459
 460         # outputs
 461         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 462         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 463
 464         # for branch speculation experiment.  branch_direction = 0 if
 465         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 466         # branch_succ and branch_fail are requests to have the current
 467         # instruction be dependent on the branch unit "shadow" capability.
 468         self.branch_succ_i = Signal(reset_less=True)
 469         self.branch_fail_i = Signal(reset_less=True)
 470         self.branch_direction_o = Signal(2, reset_less=True)
 471
 472     def elaborate(self, platform):
 473         m = Module()
 474         comb = m.d.comb
 475         sync = m.d.sync
 476
 477         m.submodules.intregs = self.intregs
 478         m.submodules.fpregs = self.fpregs
 479         m.submodules.mem = mem = self.mem
 480
 481         # register ports
 482         int_dest = self.intregs.write_port("dest")
 483         int_src1 = self.intregs.read_port("src1")
 484         int_src2 = self.intregs.read_port("src2")
 485
 486         fp_dest = self.fpregs.write_port("dest")
 487         fp_src1 = self.fpregs.read_port("src1")
 488         fp_src2 = self.fpregs.read_port("src2")
 489
 490         # Int ALUs and BR ALUs
 491         n_int_alus = 5
 492         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 493         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 494
 495         # LDST Comp Units
 496         n_ldsts = 2
 497         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 498
 499         # Comp Units
 500         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 501         bgt = cub.bgt  # get at the branch computation unit
 502         br1 = cub.br1
 503
 504         # Int FUs
 505         fu_n_src = 2
 506         fu_n_dst = 1
 507         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 508                                                      fu_n_src, fu_n_dst)
 509
 510         # Memory FUs
 511         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 512
 513         # Memory Priority Picker 1: one gateway per memory port
 514         # picks 1 reader and 1 writer to intreg
 515         mempick1 = GroupPicker(n_ldsts, 1, 1)
 516         m.submodules.mempick1 = mempick1
 517
 518         # Count of number of FUs
 519         n_intfus = n_int_alus
 520         n_fp_fus = 0  # for now
 521
 522         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 523         # picks 1 reader and 1 writer to intreg
 524         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 525         m.submodules.intpick1 = ipick1
 526
 527         # INT/FP Issue Unit
 528         regdecode = RegDecode(self.n_regs)
 529         m.submodules.regdecode = regdecode
 530         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 531         m.submodules.issueunit = issueunit
 532
 533         # Shadow Matrix.  currently n_intfus shadows, to be used for
 534         # write-after-write hazards.  NOTE: there is one extra for branches,
 535         # so the shadow width is increased by 1
 536         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 537         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 538
 539         # record previous instruction to cast shadow on current instruction
 540         prev_shadow = Signal(n_intfus)
 541
 542         # Branch Speculation recorder.  tracks the success/fail state as
 543         # each instruction is issued, so that when the branch occurs the
 544         # allow/cancel can be issued as appropriate.
 545         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 546
 547         # ---------
 548         # ok start wiring things together...
 549         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 550         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 551         # ---------
 552
 553         # ---------
 554         # Issue Unit is where it starts.  set up some in/outs for this module
 555         # ---------
 556         comb += [regdecode.dest_i.eq(self.int_dest_i),
 557                  regdecode.src1_i.eq(self.int_src1_i),
 558                  regdecode.src2_i.eq(self.int_src2_i),
 559                  regdecode.enable_i.eq(self.reg_enable_i),
 560                  self.issue_o.eq(issueunit.issue_o)
 561                  ]
 562
 563         # take these to outside (issue needs them)
 564         comb += cua.op.eq(self.alu_op)
 565         comb += cub.oper_i.eq(self.br_oper_i)
 566         comb += cub.imm_i.eq(self.br_imm_i)
 567         comb += cul.oper_i.eq(self.ls_oper_i)
 568         comb += cul.imm_i.eq(self.ls_imm_i)
 569
 570         # TODO: issueunit.f (FP)
 571
 572         # and int function issue / busy arrays, and dest/src1/src2
 573         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 574         comb += intfus.src_i[0].eq(regdecode.src1_o)
 575         comb += intfus.src_i[1].eq(regdecode.src2_o)
 576
 577         fn_issue_o = issueunit.fn_issue_o
 578
 579         comb += intfus.fn_issue_i.eq(fn_issue_o)
 580         comb += issueunit.busy_i.eq(cu.busy_o)
 581         comb += self.busy_o.eq(cu.busy_o.bool())
 582
 583         # ---------
 584         # Memory Function Unit
 585         # ---------
 586         reset_b = Signal(cul.n_units, reset_less=True)
 587         # XXX was cul.go_wr_i not done.o
 588         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 589         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 590
 591         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 592         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 593         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 594
 595         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 596         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 597         # issue_i.  multi-issue gets a bit more complex but not a lot.
 598         prior_ldsts = Signal(cul.n_units, reset_less=True)
 599         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 600         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 601             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 602         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 603             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 604
 605         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 606         # just immediately activate go_adr
 607         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 608
 609         # connect up address data
 610         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 611         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 612
 613         # connect loadable / storable to go_ld/go_st.
 614         # XXX should only be done when the memory ld/st has actually happened!
 615         go_st_i = Signal(cul.n_units, reset_less=True)
 616         go_ld_i = Signal(cul.n_units, reset_less=True)
 617         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 618                            cul.adr_rel_o & cul.ld_o)
 619         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 620                            cul.sto_rel_o & cul.st_o)
 621         comb += memfus.go_ld_i.eq(go_ld_i)
 622         comb += memfus.go_st_i.eq(go_st_i)
 623         #comb += cul.go_wr_i.eq(go_ld_i)
 624         comb += cul.go_st_i.eq(go_st_i)
 625
 626         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 627         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 628         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 629
 630         # ---------
 631         # merge shadow matrices outputs
 632         # ---------
 633
 634         # these are explained in ShadowMatrix docstring, and are to be
 635         # connected to the FUReg and FUFU Matrices, to get them to reset
 636         anydie = Signal(n_intfus, reset_less=True)
 637         allshadown = Signal(n_intfus, reset_less=True)
 638         shreset = Signal(n_intfus, reset_less=True)
 639         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 640         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 641         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 642
 643         # ---------
 644         # connect fu-fu matrix
 645         # ---------
 646
 647         # Group Picker... done manually for now.
 648         go_rd_o = ipick1.go_rd_o
 649         go_wr_o = ipick1.go_wr_o
 650         go_rd_i = intfus.go_rd_i
 651         go_wr_i = intfus.go_wr_i
 652         go_die_i = intfus.go_die_i
 653         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 654         for i in range(fu_n_src):
 655             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 656         for i in range(fu_n_dst):
 657             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 658         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 659
 660         # Connect Picker
 661         # ---------
 662         int_rd_o = intfus.readable_o
 663         rrel_o = cu.rd_rel_o
 664         rqrl_o = cu.req_rel_o
 665         for i in range(fu_n_src):
 666             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 667             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 668         int_wr_o = intfus.writable_o
 669         for i in range(fu_n_dst):
 670             # XXX FIXME: rqrl_o[i] here
 671             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 672             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 673
 674         # ---------
 675         # Shadow Matrix
 676         # ---------
 677
 678         comb += shadows.issue_i.eq(fn_issue_o)
 679         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 680         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 681         # ---------
 682         # NOTE; this setup is for the instruction order preservation...
 683
 684         # connect shadows / go_dies to Computation Units
 685         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 686         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 687
 688         # ok connect first n_int_fu shadows to busy lines, to create an
 689         # instruction-order linked-list-like arrangement, using a bit-matrix
 690         # (instead of e.g. a ring buffer).
 691
 692         # when written, the shadow can be cancelled (and was good)
 693         for i in range(n_intfus):
 694             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 695             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 696             # because the comp unit is only free once completed
 697             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 698
 699         # *previous* instruction shadows *current* instruction, and, obviously,
 700         # if the previous is completed (!busy) don't cast the shadow!
 701         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 702         for i in range(n_intfus):
 703             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 704
 705         # ---------
 706         # ... and this is for branch speculation.  it uses the extra bit
 707         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 708         # only needs to set shadow_i, s_fail_i and s_good_i
 709
 710         # issue captures shadow_i (if enabled)
 711         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 712
 713         bactive = Signal(reset_less=True)
 714         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 715
 716         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 717         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 718             comb += bshadow.issue_i.eq(fn_issue_o)
 719             for i in range(n_intfus):
 720                 with m.If(fn_issue_o & (Const(1 << i))):
 721                     comb += bshadow.shadow_i[i][0].eq(1)
 722
 723         # finally, we need an indicator to the test infrastructure as to
 724         # whether the branch succeeded or failed, plus, link up to the
 725         # "recorder" of whether the instruction was under shadow or not
 726
 727         with m.If(br1.issue_i):
 728             sync += bspec.active_i.eq(1)
 729         with m.If(self.branch_succ_i):
 730             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 731         with m.If(self.branch_fail_i):
 732             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 733
 734         # branch is active (TODO: a better signal: this is over-using the
 735         # go_write signal - actually the branch should not be "writing")
 736         with m.If(br1.go_wr_i):
 737             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 738             sync += bspec.active_i.eq(0)
 739             comb += bspec.br_i.eq(1)
 740             # branch occurs if data == 1, failed if data == 0
 741             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 742             for i in range(n_intfus):
 743                 # *expected* direction of the branch matched against *actual*
 744                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 745                 # ... or it didn't
 746                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 747
 748         # ---------
 749         # Connect Register File(s)
 750         # ---------
 751         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 752         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 753         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 754
 755         # connect ALUs to regfile
 756         comb += int_dest.data_i.eq(cu.data_o)
 757         comb += cu.src1_i.eq(int_src1.data_o)
 758         comb += cu.src2_i.eq(int_src2.data_o)
 759
 760         # connect ALU Computation Units
 761         for i in range(fu_n_src):
 762             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 763         for i in range(fu_n_dst):
 764             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 765         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 766
 767         return m
 768
 769     def __iter__(self):
 770         yield from self.intregs
 771         yield from self.fpregs
 772         yield self.int_dest_i
 773         yield self.int_src1_i
 774         yield self.int_src2_i
 775         yield self.issue_o
 776         yield self.branch_succ_i
 777         yield self.branch_fail_i
 778         yield self.branch_direction_o
 779
 780     def ports(self):
 781         return list(self)
 782
 783
 784 class IssueToScoreboard(Elaboratable):
 785
 786     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 787         self.qlen = qlen
 788         self.n_in = n_in
 789         self.n_out = n_out
 790         self.rwid = rwid
 791         self.opw = opwid
 792         self.n_regs = n_regs
 793
 794         mqbits = unsigned(int(log(qlen) / log(2))+2)
 795         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 796         self.p_ready_o = Signal()  # instructions were added
 797         self.data_i = Instruction._nq(n_in, "data_i")
 798
 799         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 800         self.qlen_o = Signal(mqbits, reset_less=True)
 801
 802     def elaborate(self, platform):
 803         m = Module()
 804         comb = m.d.comb
 805         sync = m.d.sync
 806
 807         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 808                           self.n_in, self.n_out)
 809         sc = Scoreboard(self.rwid, self.n_regs)
 810         m.submodules.iq = iq
 811         m.submodules.sc = sc
 812
 813         # get at the regfile for testing
 814         self.intregs = sc.intregs
 815
 816         # and the "busy" signal and instruction queue length
 817         comb += self.busy_o.eq(sc.busy_o)
 818         comb += self.qlen_o.eq(iq.qlen_o)
 819
 820         # link up instruction queue
 821         comb += iq.p_add_i.eq(self.p_add_i)
 822         comb += self.p_ready_o.eq(iq.p_ready_o)
 823         for i in range(self.n_in):
 824             comb += eq(iq.data_i[i], self.data_i[i])
 825
 826         # take instruction and process it.  note that it's possible to
 827         # "inspect" the queue contents *without* actually removing the
 828         # items.  items are only removed when the
 829
 830         # in "waiting" state
 831         wait_issue_br = Signal()
 832         wait_issue_alu = Signal()
 833         wait_issue_ls = Signal()
 834
 835         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 836             # set instruction pop length to 1 if the unit accepted
 837             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 838                 with m.If(iq.qlen_o != 0):
 839                     comb += iq.n_sub_i.eq(1)
 840             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 841                 with m.If(iq.qlen_o != 0):
 842                     comb += iq.n_sub_i.eq(1)
 843             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 844                 with m.If(iq.qlen_o != 0):
 845                     comb += iq.n_sub_i.eq(1)
 846
 847         # see if some instruction(s) are here.  note that this is
 848         # "inspecting" the in-place queue.  note also that on the
 849         # cycle following "waiting" for fn_issue_o to be set, the
 850         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 851         with m.If(iq.qlen_o != 0):
 852             # get the operands and operation
 853             instr = iq.data_o[0]
 854             imm = instr.imm_data.data
 855             dest = instr.write_reg.data
 856             src1 = instr.read_reg1.data
 857             src2 = instr.read_reg2.data
 858             op = instr.insn_type
 859             fu = instr.fn_unit
 860             opi = instr.imm_data.ok  # immediate set
 861
 862             # set the src/dest regs
 863             comb += sc.int_dest_i.eq(dest)
 864             comb += sc.int_src1_i.eq(src1)
 865             comb += sc.int_src2_i.eq(src2)
 866             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 867
 868             # choose a Function-Unit-Group
 869             with m.If(fu == Function.ALU):  # alu
 870                 comb += sc.alu_op.eq_from_execute1(instr)
 871                 comb += sc.aluissue.insn_i.eq(1)
 872                 comb += wait_issue_alu.eq(1)
 873             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 874                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 875                 comb += sc.br_imm_i.eq(imm)
 876                 comb += sc.brissue.insn_i.eq(1)
 877                 comb += wait_issue_br.eq(1)
 878             with m.Elif((op & (0x3 << 4)) != 0):  # ld/st
 879                 # see compldst.py
 880                 # bit 0: ADD/SUB
 881                 # bit 1: immed
 882                 # bit 4: LD
 883                 # bit 5: ST
 884                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 885                 comb += sc.ls_imm_i.eq(imm)
 886                 comb += sc.lsissue.insn_i.eq(1)
 887                 comb += wait_issue_ls.eq(1)
 888
 889             # XXX TODO
 890             # these indicate that the instruction is to be made
 891             # shadow-dependent on
 892             # (either) branch success or branch fail
 893             # yield sc.branch_fail_i.eq(branch_fail)
 894             # yield sc.branch_succ_i.eq(branch_success)
 895
 896         return m
 897
 898     def __iter__(self):
 899         yield self.p_ready_o
 900         for o in self.data_i:
 901             yield from list(o)
 902         yield self.p_add_i
 903
 904     def ports(self):
 905         return list(self)
 906
 907
 908 def power_instr_q(dut, pdecode2, ins, code):
 909     instrs = [pdecode2.e]
 910
 911     sendlen = 1
 912     for idx, instr in enumerate(instrs):
 913         yield dut.data_i[idx].eq(instr)
 914         insn_type = yield instr.insn_type
 915         fn_unit = yield instr.fn_unit
 916         print("senddata ", idx, insn_type, fn_unit, instr)
 917     yield dut.p_add_i.eq(sendlen)
 918     yield
 919     o_p_ready = yield dut.p_ready_o
 920     while not o_p_ready:
 921         yield
 922         o_p_ready = yield dut.p_ready_o
 923
 924     yield dut.p_add_i.eq(0)
 925
 926
 927 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 928             branch_success, branch_fail):
 929     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 930                 'imm_data': (imm, op_imm),
 931                'read_reg1': src1, 'read_reg2': src2}]
 932
 933     sendlen = 1
 934     for idx, instr in enumerate(instrs):
 935         imm, op_imm = instr['imm_data']
 936         reg1 = instr['read_reg1']
 937         reg2 = instr['read_reg2']
 938         dest = instr['write_reg']
 939         insn_type = instr['insn_type']
 940         fn_unit = instr['fn_unit']
 941         yield dut.data_i[idx].insn_type.eq(insn_type)
 942         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 943         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 944         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 945         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 946         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 947         yield dut.data_i[idx].write_reg.data.eq(dest)
 948         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 949         yield dut.data_i[idx].imm_data.data.eq(imm)
 950         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 951         di = yield dut.data_i[idx]
 952         print("senddata %d %x" % (idx, di))
 953     yield dut.p_add_i.eq(sendlen)
 954     yield
 955     o_p_ready = yield dut.p_ready_o
 956     while not o_p_ready:
 957         yield
 958         o_p_ready = yield dut.p_ready_o
 959
 960     yield dut.p_add_i.eq(0)
 961
 962
 963 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 964     yield from disable_issue(dut)
 965     yield dut.int_dest_i.eq(dest)
 966     yield dut.int_src1_i.eq(src1)
 967     yield dut.int_src2_i.eq(src2)
 968     if (op & (0x3 << 2)) != 0:  # branch
 969         yield dut.brissue.insn_i.eq(1)
 970         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 971         yield dut.br_imm_i.eq(imm)
 972         dut_issue = dut.brissue
 973     else:
 974         yield dut.aluissue.insn_i.eq(1)
 975         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 976         yield dut.alu_imm_i.eq(imm)
 977         dut_issue = dut.aluissue
 978     yield dut.reg_enable_i.eq(1)
 979
 980     # these indicate that the instruction is to be made shadow-dependent on
 981     # (either) branch success or branch fail
 982     yield dut.branch_fail_i.eq(branch_fail)
 983     yield dut.branch_succ_i.eq(branch_success)
 984
 985     yield
 986     yield from wait_for_issue(dut, dut_issue)
 987
 988
 989 def print_reg(dut, rnums):
 990     rs = []
 991     for rnum in rnums:
 992         reg = yield dut.intregs.regs[rnum].reg
 993         rs.append("%x" % reg)
 994     rnums = map(str, rnums)
 995     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 996
 997
 998 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 999     insts = []
1000     for i in range(n_ops):
1001         src1 = randint(1, dut.n_regs-1)
1002         src2 = randint(1, dut.n_regs-1)
1003         imm = randint(1, (1 << dut.rwid)-1)
1004         dest = randint(1, dut.n_regs-1)
1005         op = randint(0, max_opnums)
1006         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
1007
1008         if shadowing:
1009             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1010         else:
1011             insts.append((src1, src2, dest, op, opi, imm))
1012     return insts
1013
1014
1015 def wait_for_busy_clear(dut):
1016     while True:
1017         busy_o = yield dut.busy_o
1018         if not busy_o:
1019             break
1020         print("busy",)
1021         yield
1022
1023
1024 def disable_issue(dut):
1025     yield dut.aluissue.insn_i.eq(0)
1026     yield dut.brissue.insn_i.eq(0)
1027     yield dut.lsissue.insn_i.eq(0)
1028
1029
1030 def wait_for_issue(dut, dut_issue):
1031     while True:
1032         issue_o = yield dut_issue.fn_issue_o
1033         if issue_o:
1034             yield from disable_issue(dut)
1035             yield dut.reg_enable_i.eq(0)
1036             break
1037         print("busy",)
1038         # yield from print_reg(dut, [1,2,3])
1039         yield
1040     # yield from print_reg(dut, [1,2,3])
1041
1042
1043 def scoreboard_branch_sim(dut, alusim):
1044
1045     iseed = 3
1046
1047     for i in range(1):
1048
1049         print("rseed", iseed)
1050         seed(iseed)
1051         iseed += 1
1052
1053         yield dut.branch_direction_o.eq(0)
1054
1055         # set random values in the registers
1056         for i in range(1, dut.n_regs):
1057             val = 31+i*3
1058             val = randint(0, (1 << alusim.rwidth)-1)
1059             yield dut.intregs.regs[i].reg.eq(val)
1060             alusim.setval(i, val)
1061
1062         if False:
1063             # create some instructions: branches create a tree
1064             insts = create_random_ops(dut, 1, True, 1)
1065             #insts.append((6, 6, 1, 2, (0, 0)))
1066             #insts.append((4, 3, 3, 0, (0, 0)))
1067
1068             src1 = randint(1, dut.n_regs-1)
1069             src2 = randint(1, dut.n_regs-1)
1070             #op = randint(4, 7)
1071             op = 4  # only BGT at the moment
1072
1073             branch_ok = create_random_ops(dut, 1, True, 1)
1074             branch_fail = create_random_ops(dut, 1, True, 1)
1075
1076             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1077
1078         if True:
1079             insts = []
1080             insts.append((3, 5, 2, 0, (0, 0)))
1081             branch_ok = []
1082             branch_fail = []
1083             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1084             branch_ok.append(None)
1085             branch_fail.append((1, 1, 2, 0, (0, 1)))
1086             #branch_fail.append( None )
1087             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1088
1089         siminsts = deepcopy(insts)
1090
1091         # issue instruction(s)
1092         i = -1
1093         instrs = insts
1094         branch_direction = 0
1095         while instrs:
1096             yield
1097             yield
1098             i += 1
1099             branch_direction = yield dut.branch_direction_o  # way branch went
1100             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1101             if branch_direction == 1 and shadow_on:
1102                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1103                 continue  # branch was "success" and this is a "failed"... skip
1104             if branch_direction == 2 and shadow_off:
1105                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1106                 continue  # branch was "fail" and this is a "success"... skip
1107             if branch_direction != 0:
1108                 shadow_on = 0
1109                 shadow_off = 0
1110             is_branch = op >= 4
1111             if is_branch:
1112                 branch_ok, branch_fail = dest
1113                 dest = src2
1114                 # ok zip up the branch success / fail instructions and
1115                 # drop them into the queue, one marked "to have branch success"
1116                 # the other to be marked shadow branch "fail".
1117                 # one out of each of these will be cancelled
1118                 for ok, fl in zip(branch_ok, branch_fail):
1119                     if ok:
1120                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1121                     if fl:
1122                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1123             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1124                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1125             yield from int_instr(dut, op, src1, src2, dest,
1126                                  shadow_on, shadow_off)
1127
1128         # wait for all instructions to stop before checking
1129         yield
1130         yield from wait_for_busy_clear(dut)
1131
1132         i = -1
1133         while siminsts:
1134             instr = siminsts.pop(0)
1135             if instr is None:
1136                 continue
1137             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1138             i += 1
1139             is_branch = op >= 4
1140             if is_branch:
1141                 branch_ok, branch_fail = dest
1142                 dest = src2
1143             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1144                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1145             branch_res = alusim.op(op, src1, src2, dest)
1146             if is_branch:
1147                 if branch_res:
1148                     siminsts += branch_ok
1149                 else:
1150                     siminsts += branch_fail
1151
1152         # check status
1153         yield from alusim.check(dut)
1154         yield from alusim.dump(dut)
1155
1156
1157 def power_sim(m, dut, pdecode2, instruction, alusim):
1158
1159     seed(0)
1160
1161     for i in range(1):
1162
1163         # set random values in the registers
1164         for i in range(1, dut.n_regs):
1165             #val = randint(0, (1<<alusim.rwidth)-1)
1166             #val = 31+i*3
1167             val = i # XXX actually, not random at all
1168             yield dut.intregs.regs[i].reg.eq(val)
1169             alusim.setval(i, val)
1170
1171         # create some instructions
1172         lst = ["addi 2, 0, 0x4321",
1173                "addi 3, 0, 0x1234",
1174                "add  1, 3, 2",
1175                "add  4, 3, 5"
1176                 ]
1177         with Program(lst) as program:
1178             gen = program.generate_instructions()
1179
1180             # issue instruction(s), wait for issue to be free before proceeding
1181             for ins, code in zip(gen, program.assembly.splitlines()):
1182                 yield instruction.eq(ins)          # raw binary instr.
1183                 yield #Delay(1e-6)
1184
1185                 print("binary 0x{:X}".format(ins & 0xffffffff))
1186                 print("assembly", code)
1187
1188                 #alusim.op(op, opi, imm, src1, src2, dest)
1189                 yield from power_instr_q(dut, pdecode2, ins, code)
1190
1191         # wait for all instructions to stop before checking
1192         while True:
1193             iqlen = yield dut.qlen_o
1194             if iqlen == 0:
1195                 break
1196             yield
1197         yield
1198         yield
1199         yield
1200         yield
1201         yield from wait_for_busy_clear(dut)
1202
1203         # check status
1204         yield from alusim.check(dut)
1205         yield from alusim.dump(dut)
1206
1207
1208 def scoreboard_sim(dut, alusim):
1209
1210     seed(0)
1211
1212     for i in range(1):
1213
1214         # set random values in the registers
1215         for i in range(1, dut.n_regs):
1216             #val = randint(0, (1<<alusim.rwidth)-1)
1217             #val = 31+i*3
1218             val = i
1219             yield dut.intregs.regs[i].reg.eq(val)
1220             alusim.setval(i, val)
1221
1222         # create some instructions (some random, some regression tests)
1223         instrs = []
1224         if False:
1225             instrs = create_random_ops(dut, 15, True, 4)
1226
1227         if False:  # LD/ST test (with immediate)
1228             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1229             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1230
1231         if False:
1232             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1233
1234         if False:
1235             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1236             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1237             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1238
1239         if True:
1240             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1241                            0, 0, (0, 0)))
1242             instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1243                            0, 0, (0, 0)))
1244         if False:
1245             instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1246                            1, 7, (0, 0)))
1247         if False:
1248             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1249                            0, 0, (0, 0)))
1250
1251         if False:
1252             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1253             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1254             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1255             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1256             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1257
1258         if False:
1259             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1260             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1261             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1262
1263         if False:
1264             instrs.append((5, 6, 2, 1))
1265             instrs.append((2, 2, 4, 0))
1266             #instrs.append((2, 2, 3, 1))
1267
1268         if False:
1269             instrs.append((2, 1, 2, 3))
1270
1271         if False:
1272             instrs.append((2, 6, 2, 1))
1273             instrs.append((2, 1, 2, 0))
1274
1275         if False:
1276             instrs.append((1, 2, 7, 2))
1277             instrs.append((7, 1, 5, 0))
1278             instrs.append((4, 4, 1, 1))
1279
1280         if False:
1281             instrs.append((5, 6, 2, 2))
1282             instrs.append((1, 1, 4, 1))
1283             instrs.append((6, 5, 3, 0))
1284
1285         if False:
1286             # Write-after-Write Hazard
1287             instrs.append((3, 6, 7, 2))
1288             instrs.append((4, 4, 7, 1))
1289
1290         if False:
1291             # self-read/write-after-write followed by Read-after-Write
1292             instrs.append((1, 1, 1, 1))
1293             instrs.append((1, 5, 3, 0))
1294
1295         if False:
1296             # Read-after-Write followed by self-read-after-write
1297             instrs.append((5, 6, 1, 2))
1298             instrs.append((1, 1, 1, 1))
1299
1300         if False:
1301             # self-read-write sandwich
1302             instrs.append((5, 6, 1, 2))
1303             instrs.append((1, 1, 1, 1))
1304             instrs.append((1, 5, 3, 0))
1305
1306         if False:
1307             # very weird failure
1308             instrs.append((5, 2, 5, 2))
1309             instrs.append((2, 6, 3, 0))
1310             instrs.append((4, 2, 2, 1))
1311
1312         if False:
1313             v1 = 4
1314             yield dut.intregs.regs[5].reg.eq(v1)
1315             alusim.setval(5, v1)
1316             yield dut.intregs.regs[3].reg.eq(5)
1317             alusim.setval(3, 5)
1318             instrs.append((5, 3, 3, 4, (0, 0)))
1319             instrs.append((4, 2, 1, 2, (0, 1)))
1320
1321         if False:
1322             v1 = 6
1323             yield dut.intregs.regs[5].reg.eq(v1)
1324             alusim.setval(5, v1)
1325             yield dut.intregs.regs[3].reg.eq(5)
1326             alusim.setval(3, 5)
1327             instrs.append((5, 3, 3, 4, (0, 0)))
1328             instrs.append((4, 2, 1, 2, (1, 0)))
1329
1330         if False:
1331             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1332             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1333             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1334             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1335             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1336             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1337             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1338             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1339             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1340
1341         # issue instruction(s), wait for issue to be free before proceeding
1342         for i, instr in enumerate(instrs):
1343             print (i, instr)
1344             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1345
1346             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1347                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1348             alusim.op(op, opi, imm, src1, src2, dest)
1349             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1350                                br_ok, br_fail)
1351
1352         # wait for all instructions to stop before checking
1353         while True:
1354             iqlen = yield dut.qlen_o
1355             if iqlen == 0:
1356                 break
1357             yield
1358         yield
1359         yield
1360         yield
1361         yield
1362         yield from wait_for_busy_clear(dut)
1363
1364         # check status
1365         yield from alusim.check(dut)
1366         yield from alusim.dump(dut)
1367
1368
1369 def test_scoreboard():
1370     regwidth = 64
1371     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1372     alusim = RegSim(regwidth, 8)
1373     memsim = MemSim(16, 8)
1374
1375     m = Module()
1376     comb = m.d.comb
1377     instruction = Signal(32)
1378
1379     # set up the decoder (and simulator, later)
1380     pdecode = create_pdecode()
1381     #simulator = ISA(pdecode, initial_regs)
1382
1383     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1384     m.submodules.sim = dut
1385
1386     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1387     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1388
1389     vl = rtlil.convert(m, ports=dut.ports())
1390     with open("test_scoreboard6600.il", "w") as f:
1391         f.write(vl)
1392
1393     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1394                    vcd_name='test_powerboard6600.vcd')
1395
1396     #run_simulation(dut, scoreboard_sim(dut, alusim),
1397     #               vcd_name='test_scoreboard6600.vcd')
1398
1399     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1400     #                    vcd_name='test_scoreboard6600.vcd')
1401
1402
1403 if __name__ == '__main__':
1404     test_scoreboard()