src/soc/experiment/score6600_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5 from nmigen.back.pysim import Delay
   6
   7 from soc.regfile.regfile import RegFileArray, treereduce
   8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
   9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
  10 from soc.scoreboard.global_pending import GlobalPending
  11 from soc.scoreboard.group_picker import GroupPicker
  12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  15 from soc.scoreboard.memfu import MemFunctionUnits
  16
  17 from soc.experiment.compalu import ComputationUnitNoDelay
  18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
  19 from soc.experiment.compldst_multi import LDSTCompUnit
  20 from soc.experiment.compldst_multi import CompLDSTOpSubset
  21 from soc.experiment.l0_cache import TstL0CacheBuffer
  22
  23 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
  24
  25 from soc.decoder.power_enums import InternalOp, Function
  26 from soc.decoder.power_decoder import (create_pdecode)
  27 from soc.decoder.power_decoder2 import (PowerDecode2)
  28 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
  29
  30 from soc.simulator.program import Program
  31
  32
  33 from nmutil.latch import SRLatch
  34 from nmutil.nmoperator import eq
  35
  36 from random import randint, seed
  37 from copy import deepcopy
  38 from math import log
  39
  40 from soc.experiment.sim import RegSim, MemSim
  41 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
  42
  43
  44 class CompUnitsBase(Elaboratable):
  45     """ Computation Unit Base class.
  46
  47         Amazingly, this class works recursively.  It's supposed to just
  48         look after some ALUs (that can handle the same operations),
  49         grouping them together, however it turns out that the same code
  50         can also group *groups* of Computation Units together as well.
  51
  52         Basically it was intended just to concatenate the ALU's issue,
  53         go_rd etc. signals together, which start out as bits and become
  54         sequences.  Turns out that the same trick works just as well
  55         on Computation Units!
  56
  57         So this class may be used recursively to present a top-level
  58         sequential concatenation of all the signals in and out of
  59         ALUs, whilst at the same time making it convenient to group
  60         ALUs together.
  61
  62         At the lower level, the intent is that groups of (identical)
  63         ALUs may be passed the same operation.  Even beyond that,
  64         the intent is that that group of (identical) ALUs actually
  65         share the *same pipeline* and as such become a "Concurrent
  66         Computation Unit" as defined by Mitch Alsup (see section
  67         11.4.9.3)
  68     """
  69
  70     def __init__(self, rwid, units, ldstmode=False):
  71         """ Inputs:
  72
  73             * :rwid:   bit width of register file(s) - both FP and INT
  74             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  75         """
  76         self.units = units
  77         self.ldstmode = ldstmode
  78         self.rwid = rwid
  79         self.rwid = rwid
  80         if units and isinstance(units[0], CompUnitsBase):
  81             self.n_units = 0
  82             for u in self.units:
  83                 self.n_units += u.n_units
  84         else:
  85             self.n_units = len(units)
  86
  87         n_units = self.n_units
  88
  89         # inputs
  90         self.issue_i = Signal(n_units, reset_less=True)
  91         self.rd0 = go_record(n_units, "rd0")
  92         self.rd1 = go_record(n_units, "rd1")
  93         self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
  94         self.wr0 = go_record(n_units, "wr0")
  95         self.go_wr_i = [self.wr0.go]
  96         self.shadown_i = Signal(n_units, reset_less=True)
  97         self.go_die_i = Signal(n_units, reset_less=True)
  98         if ldstmode:
  99             self.go_ad_i = Signal(n_units, reset_less=True)
 100             self.go_st_i = Signal(n_units, reset_less=True)
 101
 102         # outputs
 103         self.busy_o = Signal(n_units, reset_less=True)
 104         self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
 105         self.req_rel_o = self.wr0.rel
 106         self.done_o = Signal(n_units, reset_less=True)
 107         if ldstmode:
 108             self.ld_o = Signal(n_units, reset_less=True)  # op is LD
 109             self.st_o = Signal(n_units, reset_less=True)  # op is ST
 110             self.adr_rel_o = Signal(n_units, reset_less=True)
 111             self.sto_rel_o = Signal(n_units, reset_less=True)
 112             self.load_mem_o = Signal(n_units, reset_less=True)
 113             self.stwd_mem_o = Signal(n_units, reset_less=True)
 114             self.addr_o = Signal(rwid, reset_less=True)
 115
 116         # in/out register data (note: not register#, actual data)
 117         self.data_o = Signal(rwid, reset_less=True)
 118         self.src1_i = Signal(rwid, reset_less=True)
 119         self.src2_i = Signal(rwid, reset_less=True)
 120         # input operand
 121
 122     def elaborate(self, platform):
 123         m = Module()
 124         comb = m.d.comb
 125
 126         for i, alu in enumerate(self.units):
 127             setattr(m.submodules, "comp%d" % i, alu)
 128
 129         go_rd_l0 = []
 130         go_rd_l1 = []
 131         go_wr_l = []
 132         issue_l = []
 133         busy_l = []
 134         req_rel_l = []
 135         done_l = []
 136         rd_rel0_l = []
 137         rd_rel1_l = []
 138         shadow_l = []
 139         godie_l = []
 140         for alu in self.units:
 141             req_rel_l.append(alu.req_rel_o)
 142             done_l.append(alu.done_o)
 143             shadow_l.append(alu.shadown_i)
 144             godie_l.append(alu.go_die_i)
 145             print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
 146             rd_rel0_l.append(alu.rd_rel_o[0])
 147             rd_rel1_l.append(alu.rd_rel_o[1])
 148             go_wr_l.append(alu.go_wr_i)
 149             go_rd_l0.append(alu.go_rd_i[0])
 150             go_rd_l1.append(alu.go_rd_i[1])
 151             issue_l.append(alu.issue_i)
 152             busy_l.append(alu.busy_o)
 153         comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
 154         comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
 155         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 156         comb += self.done_o.eq(Cat(*done_l))
 157         comb += self.busy_o.eq(Cat(*busy_l))
 158         comb += Cat(*godie_l).eq(self.go_die_i)
 159         comb += Cat(*shadow_l).eq(self.shadown_i)
 160         comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
 161         comb += Cat(*go_rd_l0).eq(self.rd0.go)
 162         comb += Cat(*go_rd_l1).eq(self.rd1.go)
 163         comb += Cat(*issue_l).eq(self.issue_i)
 164
 165         # connect data register input/output
 166
 167         # merge (OR) all integer FU / ALU outputs to a single value
 168         # XXX NOTE: this only works because there is a single "port"
 169         # protected by a single go_wr.  multi-issue requires a bus
 170         # to be inserted here.
 171         if self.units:
 172             data_o = treereduce(self.units, "data_o")
 173             comb += self.data_o.eq(data_o)
 174             if self.ldstmode:
 175                 addr_o = treereduce(self.units, "addr_o")
 176                 comb += self.addr_o.eq(addr_o)
 177
 178         for i, alu in enumerate(self.units):
 179             comb += alu.src1_i.eq(self.src1_i)
 180             comb += alu.src2_i.eq(self.src2_i)
 181
 182         if not self.ldstmode:
 183             return m
 184
 185         ldmem_l = []
 186         stmem_l = []
 187         go_ad_l = []
 188         go_st_l = []
 189         ld_l = []
 190         st_l = []
 191         adr_rel_l = []
 192         sto_rel_l = []
 193         for alu in self.units:
 194             ld_l.append(alu.ld_o)
 195             st_l.append(alu.st_o)
 196             adr_rel_l.append(alu.adr_rel_o)
 197             sto_rel_l.append(alu.sto_rel_o)
 198             ldmem_l.append(alu.load_mem_o)
 199             stmem_l.append(alu.stwd_mem_o)
 200             go_ad_l.append(alu.go_ad_i)
 201             go_st_l.append(alu.go_st_i)
 202         comb += self.ld_o.eq(Cat(*ld_l))
 203         comb += self.st_o.eq(Cat(*st_l))
 204         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 205         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 206         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 207         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 208         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 209         comb += Cat(*go_st_l).eq(self.go_st_i)
 210
 211         return m
 212
 213
 214 class CompUnitLDSTs(CompUnitsBase):
 215
 216     def __init__(self, rwid, opwid, n_ldsts, l0):
 217         """ Inputs:
 218
 219             * :rwid:   bit width of register file(s) - both FP and INT
 220             * :opwid:  operand bit width
 221         """
 222         self.opwid = opwid
 223
 224         # inputs
 225         self.op = CompLDSTOpSubset("cul_i")
 226
 227         # LD/ST Units
 228         units = []
 229         for i in range(n_ldsts):
 230             pi = l0.l0.dports[i].pi
 231             units.append(LDSTCompUnit(pi, rwid, awid=48))
 232
 233         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 234
 235     def elaborate(self, platform):
 236         m = CompUnitsBase.elaborate(self, platform)
 237         comb = m.d.comb
 238
 239         # hand the same operation to all units
 240         for ldst in self.units:
 241             comb += ldst.oper_i.eq(self.op)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.op = CompALUOpSubset("cua_i")
 258
 259         # Int ALUs
 260         alus = []
 261         for i in range(n_alus):
 262             alus.append(ALU(rwid))
 263
 264         units = []
 265         for alu in alus:
 266             aluopwid = 3  # extra bit for immediate mode
 267             units.append(MultiCompUnit(rwid, alu))
 268
 269         CompUnitsBase.__init__(self, rwid, units)
 270
 271     def elaborate(self, platform):
 272         m = CompUnitsBase.elaborate(self, platform)
 273         comb = m.d.comb
 274
 275         # hand the subset of operation to ALUs
 276         for alu in self.units:
 277             comb += alu.oper_i.eq(self.op)
 278
 279         return m
 280
 281
 282 class CompUnitBR(CompUnitsBase):
 283
 284     def __init__(self, rwid, opwid):
 285         """ Inputs:
 286
 287             * :rwid:   bit width of register file(s) - both FP and INT
 288             * :opwid:  operand bit width
 289
 290             Note: bgt unit is returned so that a shadow unit can be created
 291             for it
 292         """
 293         self.opwid = opwid
 294
 295         # inputs
 296         self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
 297         self.oper_i = Signal(opwid, reset_less=True)
 298         self.imm_i = Signal(rwid, reset_less=True)
 299
 300         # Branch ALU and CU
 301         self.bgt = BranchALU(rwid)
 302         aluopwid = 3  # extra bit for immediate mode
 303         self.br1 = MultiCompUnit(rwid, self.bgt)
 304         CompUnitsBase.__init__(self, rwid, [self.br1])
 305
 306     def elaborate(self, platform):
 307         m = CompUnitsBase.elaborate(self, platform)
 308         comb = m.d.comb
 309
 310         # hand the same operation to all units
 311         for alu in self.units:
 312             #comb += alu.oper_i.eq(self.op) # TODO
 313             comb += alu.oper_i.eq(self.oper_i)
 314             #comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_reg, n_int_alus, n_src, n_dst):
 322         self.n_src, self.n_dst = n_src, n_dst
 323         self.n_reg = n_reg
 324         self.n_int_alus = nf = n_int_alus
 325
 326         self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
 327         self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
 328
 329         self.readable_o = Signal(n_int_alus, reset_less=True)
 330         self.writable_o = Signal(n_int_alus, reset_less=True)
 331
 332         # arrays
 333         src = []
 334         rsel = []
 335         rd = []
 336         for i in range(n_src):
 337             j = i + 1 # name numbering to match src1/src2
 338             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
 339             rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
 340             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
 341         dst = []
 342         dsel = []
 343         wr = []
 344         for i in range(n_dst):
 345             j = i + 1 # name numbering to match src1/src2
 346             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
 347             dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
 348             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
 349         wpnd = []
 350         pend = []
 351         for i in range(nf):
 352             j = i + 1 # name numbering to match src1/src2
 353             pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
 354             wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
 355
 356         self.dest_i = Array(dst)     # Dest in (top)
 357         self.src_i = Array(src)      # oper in (top)
 358
 359         # for Register File Select Lines (horizontal), per-reg
 360         self.dst_rsel_o = Array(dsel) # dest reg (bot)
 361         self.src_rsel_o = Array(rsel)  # src reg (bot)
 362
 363         self.go_rd_i = Array(rd)
 364         self.go_wr_i = Array(wr)
 365
 366         self.go_die_i = Signal(n_int_alus, reset_less=True)
 367         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 368
 369         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 370
 371     def elaborate(self, platform):
 372         m = Module()
 373         comb = m.d.comb
 374         sync = m.d.sync
 375
 376         n_intfus = self.n_int_alus
 377
 378         # Integer FU-FU Dep Matrix
 379         intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
 380         m.submodules.intfudeps = intfudeps
 381         # Integer FU-Reg Dep Matrix
 382         intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
 383         m.submodules.intregdeps = intregdeps
 384
 385         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 386         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 387
 388         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 389         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 390
 391         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 392         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 393         self.wr_pend_o = intregdeps.wr_pend_o  # also output for use in WaWGrid
 394
 395         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 396         comb += intfudeps.go_die_i.eq(self.go_die_i)
 397         comb += self.readable_o.eq(intfudeps.readable_o)
 398         comb += self.writable_o.eq(intfudeps.writable_o)
 399
 400         # Connect function issue / arrays, and dest/src1/src2
 401         for i in range(self.n_src):
 402             print (i, self.go_rd_i, intfudeps.go_rd_i)
 403             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
 404             comb += intregdeps.src_i[i].eq(self.src_i[i])
 405             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
 406             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
 407         for i in range(self.n_dst):
 408             print (i, self.go_wr_i, intfudeps.go_wr_i)
 409             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
 410             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
 411             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
 412             comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
 413         comb += intregdeps.go_die_i.eq(self.go_die_i)
 414         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 415
 416         return m
 417
 418
 419 class Scoreboard(Elaboratable):
 420     def __init__(self, rwid, n_regs):
 421         """ Inputs:
 422
 423             * :rwid:   bit width of register file(s) - both FP and INT
 424             * :n_regs: depth of register file(s) - number of FP and INT regs
 425         """
 426         self.rwid = rwid
 427         self.n_regs = n_regs
 428
 429         # Register Files
 430         self.intregs = RegFileArray(rwid, n_regs)
 431         self.fpregs = RegFileArray(rwid, n_regs)
 432
 433         # Memory (test for now)
 434         self.l0 = TstL0CacheBuffer()
 435
 436         # issue q needs to get at these
 437         self.aluissue = IssueUnitGroup(2)
 438         self.lsissue = IssueUnitGroup(2)
 439         self.brissue = IssueUnitGroup(1)
 440         # and these
 441         self.instr = Decode2ToExecute1Type("sc_instr")
 442         self.br_oper_i = Signal(4, reset_less=True)
 443         self.br_imm_i = Signal(rwid, reset_less=True)
 444         self.ls_oper_i = Signal(4, reset_less=True)
 445
 446         # inputs
 447         self.int_dest_i = Signal(range(n_regs), reset_less=True)  # Dest R# in
 448         self.int_src1_i = Signal(range(n_regs), reset_less=True)  # oper1 R# in
 449         self.int_src2_i = Signal(range(n_regs), reset_less=True)  # oper2 R# in
 450         self.reg_enable_i = Signal(reset_less=True)  # enable reg decode
 451
 452         # outputs
 453         self.issue_o = Signal(reset_less=True)  # instruction was accepted
 454         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 455
 456         # for branch speculation experiment.  branch_direction = 0 if
 457         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 458         # branch_succ and branch_fail are requests to have the current
 459         # instruction be dependent on the branch unit "shadow" capability.
 460         self.branch_succ_i = Signal(reset_less=True)
 461         self.branch_fail_i = Signal(reset_less=True)
 462         self.branch_direction_o = Signal(2, reset_less=True)
 463
 464     def elaborate(self, platform):
 465         m = Module()
 466         comb = m.d.comb
 467         sync = m.d.sync
 468
 469         m.submodules.intregs = self.intregs
 470         m.submodules.fpregs = self.fpregs
 471         m.submodules.l0 = l0 = self.l0
 472
 473         # register ports
 474         int_dest = self.intregs.write_port("dest")
 475         int_src1 = self.intregs.read_port("src1")
 476         int_src2 = self.intregs.read_port("src2")
 477
 478         fp_dest = self.fpregs.write_port("dest")
 479         fp_src1 = self.fpregs.read_port("src1")
 480         fp_src2 = self.fpregs.read_port("src2")
 481
 482         # Int ALUs and BR ALUs
 483         n_int_alus = 5
 484         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 485         cub = CompUnitBR(self.rwid, 3)  # 1 BR ALUs
 486
 487         # LDST Comp Units
 488         n_ldsts = 2
 489         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
 490
 491         # Comp Units
 492         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 493         bgt = cub.bgt  # get at the branch computation unit
 494         br1 = cub.br1
 495
 496         # Int FUs
 497         fu_n_src = 2
 498         fu_n_dst = 1
 499         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
 500                                                      fu_n_src, fu_n_dst)
 501
 502         # Memory FUs
 503         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 504
 505         # Memory Priority Picker 1: one gateway per memory port
 506         # picks 1 reader and 1 writer to intreg
 507         mempick1 = GroupPicker(n_ldsts, 1, 1)
 508         m.submodules.mempick1 = mempick1
 509
 510         # Count of number of FUs
 511         n_intfus = n_int_alus
 512         n_fp_fus = 0  # for now
 513
 514         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 515         # picks 1 reader and 1 writer to intreg
 516         ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
 517         m.submodules.intpick1 = ipick1
 518
 519         # INT/FP Issue Unit
 520         regdecode = RegDecode(self.n_regs)
 521         m.submodules.regdecode = regdecode
 522         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 523         m.submodules.issueunit = issueunit
 524
 525         # Shadow Matrix.  currently n_intfus shadows, to be used for
 526         # write-after-write hazards.  NOTE: there is one extra for branches,
 527         # so the shadow width is increased by 1
 528         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 529         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 530
 531         # record previous instruction to cast shadow on current instruction
 532         prev_shadow = Signal(n_intfus)
 533
 534         # Branch Speculation recorder.  tracks the success/fail state as
 535         # each instruction is issued, so that when the branch occurs the
 536         # allow/cancel can be issued as appropriate.
 537         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 538
 539         # ---------
 540         # ok start wiring things together...
 541         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 542         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 543         # ---------
 544
 545         # ---------
 546         # Issue Unit is where it starts.  set up some in/outs for this module
 547         # ---------
 548         comb += [regdecode.dest_i.eq(self.int_dest_i),
 549                  regdecode.src1_i.eq(self.int_src1_i),
 550                  regdecode.src2_i.eq(self.int_src2_i),
 551                  regdecode.enable_i.eq(self.reg_enable_i),
 552                  self.issue_o.eq(issueunit.issue_o)
 553                  ]
 554
 555         # take these to outside (issue needs them)
 556         comb += cua.op.eq_from_execute1(self.instr)
 557         comb += cub.oper_i.eq(self.br_oper_i)
 558         comb += cub.imm_i.eq(self.br_imm_i)
 559         comb += cul.op.eq_from_execute1(self.instr)
 560
 561         # TODO: issueunit.f (FP)
 562
 563         # and int function issue / busy arrays, and dest/src1/src2
 564         comb += intfus.dest_i[0].eq(regdecode.dest_o)
 565         comb += intfus.src_i[0].eq(regdecode.src1_o)
 566         comb += intfus.src_i[1].eq(regdecode.src2_o)
 567
 568         fn_issue_o = issueunit.fn_issue_o
 569
 570         comb += intfus.fn_issue_i.eq(fn_issue_o)
 571         comb += issueunit.busy_i.eq(cu.busy_o)
 572         comb += self.busy_o.eq(cu.busy_o.bool())
 573
 574         # ---------
 575         # Memory Function Unit
 576         # ---------
 577         reset_b = Signal(cul.n_units, reset_less=True)
 578         # XXX was cul.go_wr_i not done.o
 579         # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 580         sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
 581
 582         comb += memfus.fn_issue_i.eq(cul.issue_i)  # Comp Unit Issue -> Mem FUs
 583         comb += memfus.addr_en_i.eq(cul.adr_rel_o)  # Match enable on adr rel
 584         comb += memfus.addr_rs_i.eq(reset_b)  # reset same as LDSTCompUnit
 585
 586         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 587         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 588         # issue_i.  multi-issue gets a bit more complex but not a lot.
 589         prior_ldsts = Signal(cul.n_units, reset_less=True)
 590         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 591         with m.If(self.ls_oper_i[3]):  # LD bit of operand
 592             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 593         with m.If(self.ls_oper_i[2]):  # ST bit of operand
 594             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 595
 596         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 597         # just immediately activate go_adr
 598         sync += cul.go_ad_i.eq(cul.adr_rel_o)
 599
 600         # connect up address data
 601         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 602         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 603
 604         # connect loadable / storable to go_ld/go_st.
 605         # XXX should only be done when the memory ld/st has actually happened!
 606         go_st_i = Signal(cul.n_units, reset_less=True)
 607         go_ld_i = Signal(cul.n_units, reset_less=True)
 608         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
 609                            cul.adr_rel_o & cul.ld_o)
 610         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
 611                            cul.sto_rel_o & cul.st_o)
 612         comb += memfus.go_ld_i.eq(go_ld_i)
 613         comb += memfus.go_st_i.eq(go_st_i)
 614         #comb += cul.go_wr_i.eq(go_ld_i)
 615         comb += cul.go_st_i.eq(go_st_i)
 616
 617         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 618         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 619         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 620
 621         # ---------
 622         # merge shadow matrices outputs
 623         # ---------
 624
 625         # these are explained in ShadowMatrix docstring, and are to be
 626         # connected to the FUReg and FUFU Matrices, to get them to reset
 627         anydie = Signal(n_intfus, reset_less=True)
 628         allshadown = Signal(n_intfus, reset_less=True)
 629         shreset = Signal(n_intfus, reset_less=True)
 630         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 631         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 632         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 633
 634         # ---------
 635         # connect fu-fu matrix
 636         # ---------
 637
 638         # Group Picker... done manually for now.
 639         go_rd_o = ipick1.go_rd_o
 640         go_wr_o = ipick1.go_wr_o
 641         go_rd_i = intfus.go_rd_i
 642         go_wr_i = intfus.go_wr_i
 643         go_die_i = intfus.go_die_i
 644         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 645         for i in range(fu_n_src):
 646             comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])  # rd
 647         for i in range(fu_n_dst):
 648             comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])  # wr
 649         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus])  # die
 650
 651         # Connect Picker
 652         # ---------
 653         int_rd_o = intfus.readable_o
 654         rrel_o = cu.rd_rel_o
 655         rqrl_o = cu.req_rel_o
 656         for i in range(fu_n_src):
 657             comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
 658             comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
 659         int_wr_o = intfus.writable_o
 660         for i in range(fu_n_dst):
 661             # XXX FIXME: rqrl_o[i] here
 662             comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
 663             comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
 664
 665         # ---------
 666         # Shadow Matrix
 667         # ---------
 668
 669         comb += shadows.issue_i.eq(fn_issue_o)
 670         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 671         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 672         # ---------
 673         # NOTE; this setup is for the instruction order preservation...
 674
 675         # connect shadows / go_dies to Computation Units
 676         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 677         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 678
 679         # ok connect first n_int_fu shadows to busy lines, to create an
 680         # instruction-order linked-list-like arrangement, using a bit-matrix
 681         # (instead of e.g. a ring buffer).
 682
 683         # when written, the shadow can be cancelled (and was good)
 684         for i in range(n_intfus):
 685             #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 686             # XXX experiment: use ~cu.busy_o instead.  *should* be good
 687             # because the comp unit is only free once completed
 688             comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
 689
 690         # *previous* instruction shadows *current* instruction, and, obviously,
 691         # if the previous is completed (!busy) don't cast the shadow!
 692         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 693         for i in range(n_intfus):
 694             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 695
 696         # ---------
 697         # ... and this is for branch speculation.  it uses the extra bit
 698         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 699         # only needs to set shadow_i, s_fail_i and s_good_i
 700
 701         # issue captures shadow_i (if enabled)
 702         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 703
 704         bactive = Signal(reset_less=True)
 705         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 706
 707         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 708         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 709             comb += bshadow.issue_i.eq(fn_issue_o)
 710             for i in range(n_intfus):
 711                 with m.If(fn_issue_o & (Const(1 << i))):
 712                     comb += bshadow.shadow_i[i][0].eq(1)
 713
 714         # finally, we need an indicator to the test infrastructure as to
 715         # whether the branch succeeded or failed, plus, link up to the
 716         # "recorder" of whether the instruction was under shadow or not
 717
 718         with m.If(br1.issue_i):
 719             sync += bspec.active_i.eq(1)
 720         with m.If(self.branch_succ_i):
 721             comb += bspec.good_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 722         with m.If(self.branch_fail_i):
 723             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)  # XXX MAGIC CONSTANT
 724
 725         # branch is active (TODO: a better signal: this is over-using the
 726         # go_write signal - actually the branch should not be "writing")
 727         with m.If(br1.go_wr_i):
 728             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 729             sync += bspec.active_i.eq(0)
 730             comb += bspec.br_i.eq(1)
 731             # branch occurs if data == 1, failed if data == 0
 732             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 733             for i in range(n_intfus):
 734                 # *expected* direction of the branch matched against *actual*
 735                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 736                 # ... or it didn't
 737                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 738
 739         # ---------
 740         # Connect Register File(s)
 741         # ---------
 742         comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
 743         comb += int_src1.ren.eq(intfus.src_rsel_o[0])
 744         comb += int_src2.ren.eq(intfus.src_rsel_o[1])
 745
 746         # connect ALUs to regfile
 747         comb += int_dest.data_i.eq(cu.data_o)
 748         comb += cu.src1_i.eq(int_src1.data_o)
 749         comb += cu.src2_i.eq(int_src2.data_o)
 750
 751         # connect ALU Computation Units
 752         for i in range(fu_n_src):
 753             comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
 754         for i in range(fu_n_dst):
 755             comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
 756         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 757
 758         return m
 759
 760     def __iter__(self):
 761         yield from self.intregs
 762         yield from self.fpregs
 763         yield self.int_dest_i
 764         yield self.int_src1_i
 765         yield self.int_src2_i
 766         yield self.issue_o
 767         yield self.branch_succ_i
 768         yield self.branch_fail_i
 769         yield self.branch_direction_o
 770
 771     def ports(self):
 772         return list(self)
 773
 774
 775 class IssueToScoreboard(Elaboratable):
 776
 777     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 778         self.qlen = qlen
 779         self.n_in = n_in
 780         self.n_out = n_out
 781         self.rwid = rwid
 782         self.opw = opwid
 783         self.n_regs = n_regs
 784
 785         mqbits = unsigned(int(log(qlen) / log(2))+2)
 786         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
 787         self.p_ready_o = Signal()  # instructions were added
 788         self.data_i = Instruction._nq(n_in, "data_i")
 789
 790         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
 791         self.qlen_o = Signal(mqbits, reset_less=True)
 792
 793     def elaborate(self, platform):
 794         m = Module()
 795         comb = m.d.comb
 796         sync = m.d.sync
 797
 798         iq = InstructionQ(self.rwid, self.opw, self.qlen,
 799                           self.n_in, self.n_out)
 800         sc = Scoreboard(self.rwid, self.n_regs)
 801         m.submodules.iq = iq
 802         m.submodules.sc = sc
 803
 804         # get at the regfile for testing
 805         self.intregs = sc.intregs
 806
 807         # and the "busy" signal and instruction queue length
 808         comb += self.busy_o.eq(sc.busy_o)
 809         comb += self.qlen_o.eq(iq.qlen_o)
 810
 811         # link up instruction queue
 812         comb += iq.p_add_i.eq(self.p_add_i)
 813         comb += self.p_ready_o.eq(iq.p_ready_o)
 814         for i in range(self.n_in):
 815             comb += eq(iq.data_i[i], self.data_i[i])
 816
 817         # take instruction and process it.  note that it's possible to
 818         # "inspect" the queue contents *without* actually removing the
 819         # items.  items are only removed when the
 820
 821         # in "waiting" state
 822         wait_issue_br = Signal()
 823         wait_issue_alu = Signal()
 824         wait_issue_ls = Signal()
 825
 826         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 827             # set instruction pop length to 1 if the unit accepted
 828             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 829                 with m.If(iq.qlen_o != 0):
 830                     comb += iq.n_sub_i.eq(1)
 831             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 832                 with m.If(iq.qlen_o != 0):
 833                     comb += iq.n_sub_i.eq(1)
 834             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 835                 with m.If(iq.qlen_o != 0):
 836                     comb += iq.n_sub_i.eq(1)
 837
 838         # see if some instruction(s) are here.  note that this is
 839         # "inspecting" the in-place queue.  note also that on the
 840         # cycle following "waiting" for fn_issue_o to be set, the
 841         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 842         with m.If(iq.qlen_o != 0):
 843             # get the operands and operation
 844             instr = iq.data_o[0]
 845             imm = instr.imm_data.data
 846             dest = instr.write_reg.data
 847             src1 = instr.read_reg1.data
 848             src2 = instr.read_reg2.data
 849             op = instr.insn_type
 850             fu = instr.fn_unit
 851             opi = instr.imm_data.ok  # immediate set
 852
 853             # set the src/dest regs
 854             comb += sc.int_dest_i.eq(dest)
 855             comb += sc.int_src1_i.eq(src1)
 856             comb += sc.int_src2_i.eq(src2)
 857             comb += sc.reg_enable_i.eq(1)  # enable the regfile
 858             comb += sc.instr.eq(instr)
 859
 860             # choose a Function-Unit-Group
 861             with m.If(fu == Function.ALU):  # alu
 862                 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
 863                 comb += wait_issue_alu.eq(1)
 864             with m.Elif(fu == Function.LDST):  # ld/st
 865                 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
 866                 comb += wait_issue_ls.eq(1)
 867
 868             with m.Elif((op & (0x3 << 2)) != 0):  # branch
 869                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 870                 comb += sc.br_imm_i.eq(imm)
 871                 comb += sc.brissue.insn_i.eq(1)
 872                 comb += wait_issue_br.eq(1)
 873             # XXX TODO
 874             # these indicate that the instruction is to be made
 875             # shadow-dependent on
 876             # (either) branch success or branch fail
 877             # yield sc.branch_fail_i.eq(branch_fail)
 878             # yield sc.branch_succ_i.eq(branch_success)
 879
 880         return m
 881
 882     def __iter__(self):
 883         yield self.p_ready_o
 884         for o in self.data_i:
 885             yield from list(o)
 886         yield self.p_add_i
 887
 888     def ports(self):
 889         return list(self)
 890
 891
 892 def power_instr_q(dut, pdecode2, ins, code):
 893     instrs = [pdecode2.e]
 894
 895     sendlen = 1
 896     for idx, instr in enumerate(instrs):
 897         yield dut.data_i[idx].eq(instr)
 898         insn_type = yield instr.insn_type
 899         fn_unit = yield instr.fn_unit
 900         print("senddata ", idx, insn_type, fn_unit, instr)
 901     yield dut.p_add_i.eq(sendlen)
 902     yield
 903     o_p_ready = yield dut.p_ready_o
 904     while not o_p_ready:
 905         yield
 906         o_p_ready = yield dut.p_ready_o
 907
 908     yield dut.p_add_i.eq(0)
 909
 910
 911 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
 912             branch_success, branch_fail):
 913     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
 914                 'imm_data': (imm, op_imm),
 915                'read_reg1': src1, 'read_reg2': src2}]
 916
 917     sendlen = 1
 918     for idx, instr in enumerate(instrs):
 919         imm, op_imm = instr['imm_data']
 920         reg1 = instr['read_reg1']
 921         reg2 = instr['read_reg2']
 922         dest = instr['write_reg']
 923         insn_type = instr['insn_type']
 924         fn_unit = instr['fn_unit']
 925         yield dut.data_i[idx].insn_type.eq(insn_type)
 926         yield dut.data_i[idx].fn_unit.eq(fn_unit)
 927         yield dut.data_i[idx].read_reg1.data.eq(reg1)
 928         yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
 929         yield dut.data_i[idx].read_reg2.data.eq(reg2)
 930         yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
 931         yield dut.data_i[idx].write_reg.data.eq(dest)
 932         yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
 933         yield dut.data_i[idx].imm_data.data.eq(imm)
 934         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
 935         di = yield dut.data_i[idx]
 936         print("senddata %d %x" % (idx, di))
 937     yield dut.p_add_i.eq(sendlen)
 938     yield
 939     o_p_ready = yield dut.p_ready_o
 940     while not o_p_ready:
 941         yield
 942         o_p_ready = yield dut.p_ready_o
 943
 944     yield dut.p_add_i.eq(0)
 945
 946
 947 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 948     yield from disable_issue(dut)
 949     yield dut.int_dest_i.eq(dest)
 950     yield dut.int_src1_i.eq(src1)
 951     yield dut.int_src2_i.eq(src2)
 952     if (op & (0x3 << 2)) != 0:  # branch
 953         yield dut.brissue.insn_i.eq(1)
 954         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 955         yield dut.br_imm_i.eq(imm)
 956         dut_issue = dut.brissue
 957     else:
 958         yield dut.aluissue.insn_i.eq(1)
 959         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 960         yield dut.alu_imm_i.eq(imm)
 961         dut_issue = dut.aluissue
 962     yield dut.reg_enable_i.eq(1)
 963
 964     # these indicate that the instruction is to be made shadow-dependent on
 965     # (either) branch success or branch fail
 966     yield dut.branch_fail_i.eq(branch_fail)
 967     yield dut.branch_succ_i.eq(branch_success)
 968
 969     yield
 970     yield from wait_for_issue(dut, dut_issue)
 971
 972
 973 def print_reg(dut, rnums):
 974     rs = []
 975     for rnum in rnums:
 976         reg = yield dut.intregs.regs[rnum].reg
 977         rs.append("%x" % reg)
 978     rnums = map(str, rnums)
 979     print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 980
 981
 982 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 983     insts = []
 984     for i in range(n_ops):
 985         src1 = randint(1, dut.n_regs-1)
 986         src2 = randint(1, dut.n_regs-1)
 987         imm = randint(1, (1 << dut.rwid)-1)
 988         dest = randint(1, dut.n_regs-1)
 989         op = randint(0, max_opnums)
 990         opi = 0 if randint(0, 2) else 1  # set true if random is nonzero
 991
 992         if shadowing:
 993             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 994         else:
 995             insts.append((src1, src2, dest, op, opi, imm))
 996     return insts
 997
 998
 999 def wait_for_busy_clear(dut):
1000     while True:
1001         busy_o = yield dut.busy_o
1002         if not busy_o:
1003             break
1004         print("busy",)
1005         yield
1006
1007
1008 def disable_issue(dut):
1009     yield dut.aluissue.insn_i.eq(0)
1010     yield dut.brissue.insn_i.eq(0)
1011     yield dut.lsissue.insn_i.eq(0)
1012
1013
1014 def wait_for_issue(dut, dut_issue):
1015     while True:
1016         issue_o = yield dut_issue.fn_issue_o
1017         if issue_o:
1018             yield from disable_issue(dut)
1019             yield dut.reg_enable_i.eq(0)
1020             break
1021         print("busy",)
1022         # yield from print_reg(dut, [1,2,3])
1023         yield
1024     # yield from print_reg(dut, [1,2,3])
1025
1026
1027 def scoreboard_branch_sim(dut, alusim):
1028
1029     iseed = 3
1030
1031     for i in range(1):
1032
1033         print("rseed", iseed)
1034         seed(iseed)
1035         iseed += 1
1036
1037         yield dut.branch_direction_o.eq(0)
1038
1039         # set random values in the registers
1040         for i in range(1, dut.n_regs):
1041             val = 31+i*3
1042             val = randint(0, (1 << alusim.rwidth)-1)
1043             yield dut.intregs.regs[i].reg.eq(val)
1044             alusim.setval(i, val)
1045
1046         if False:
1047             # create some instructions: branches create a tree
1048             insts = create_random_ops(dut, 1, True, 1)
1049             #insts.append((6, 6, 1, 2, (0, 0)))
1050             #insts.append((4, 3, 3, 0, (0, 0)))
1051
1052             src1 = randint(1, dut.n_regs-1)
1053             src2 = randint(1, dut.n_regs-1)
1054             #op = randint(4, 7)
1055             op = 4  # only BGT at the moment
1056
1057             branch_ok = create_random_ops(dut, 1, True, 1)
1058             branch_fail = create_random_ops(dut, 1, True, 1)
1059
1060             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1061
1062         if True:
1063             insts = []
1064             insts.append((3, 5, 2, 0, (0, 0)))
1065             branch_ok = []
1066             branch_fail = []
1067             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1068             branch_ok.append(None)
1069             branch_fail.append((1, 1, 2, 0, (0, 1)))
1070             #branch_fail.append( None )
1071             insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1072
1073         siminsts = deepcopy(insts)
1074
1075         # issue instruction(s)
1076         i = -1
1077         instrs = insts
1078         branch_direction = 0
1079         while instrs:
1080             yield
1081             yield
1082             i += 1
1083             branch_direction = yield dut.branch_direction_o  # way branch went
1084             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1085             if branch_direction == 1 and shadow_on:
1086                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1087                 continue  # branch was "success" and this is a "failed"... skip
1088             if branch_direction == 2 and shadow_off:
1089                 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1090                 continue  # branch was "fail" and this is a "success"... skip
1091             if branch_direction != 0:
1092                 shadow_on = 0
1093                 shadow_off = 0
1094             is_branch = op >= 4
1095             if is_branch:
1096                 branch_ok, branch_fail = dest
1097                 dest = src2
1098                 # ok zip up the branch success / fail instructions and
1099                 # drop them into the queue, one marked "to have branch success"
1100                 # the other to be marked shadow branch "fail".
1101                 # one out of each of these will be cancelled
1102                 for ok, fl in zip(branch_ok, branch_fail):
1103                     if ok:
1104                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1105                     if fl:
1106                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1107             print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1108                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1109             yield from int_instr(dut, op, src1, src2, dest,
1110                                  shadow_on, shadow_off)
1111
1112         # wait for all instructions to stop before checking
1113         yield
1114         yield from wait_for_busy_clear(dut)
1115
1116         i = -1
1117         while siminsts:
1118             instr = siminsts.pop(0)
1119             if instr is None:
1120                 continue
1121             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1122             i += 1
1123             is_branch = op >= 4
1124             if is_branch:
1125                 branch_ok, branch_fail = dest
1126                 dest = src2
1127             print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1128                   (i, src1, src2, dest, op, shadow_on, shadow_off))
1129             branch_res = alusim.op(op, src1, src2, dest)
1130             if is_branch:
1131                 if branch_res:
1132                     siminsts += branch_ok
1133                 else:
1134                     siminsts += branch_fail
1135
1136         # check status
1137         yield from alusim.check(dut)
1138         yield from alusim.dump(dut)
1139
1140
1141 def power_sim(m, dut, pdecode2, instruction, alusim):
1142
1143     seed(0)
1144
1145     for i in range(1):
1146
1147         # set random values in the registers
1148         for i in range(1, dut.n_regs):
1149             #val = randint(0, (1<<alusim.rwidth)-1)
1150             #val = 31+i*3
1151             val = i # XXX actually, not random at all
1152             yield dut.intregs.regs[i].reg.eq(val)
1153             alusim.setval(i, val)
1154
1155         # create some instructions
1156         lst = []
1157         if False:
1158             lst += ["addi 2, 0, 0x4321",
1159                    "addi 3, 0, 0x1234",
1160                    "add  1, 3, 2",
1161                    "add  4, 3, 5"
1162                     ]
1163         if True:
1164             lst += [ "lbzu 6, 7(2)",
1165
1166                    ]
1167
1168         with Program(lst) as program:
1169             gen = program.generate_instructions()
1170
1171             # issue instruction(s), wait for issue to be free before proceeding
1172             for ins, code in zip(gen, program.assembly.splitlines()):
1173                 yield instruction.eq(ins)          # raw binary instr.
1174                 yield #Delay(1e-6)
1175
1176                 print("binary 0x{:X}".format(ins & 0xffffffff))
1177                 print("assembly", code)
1178
1179                 #alusim.op(op, opi, imm, src1, src2, dest)
1180                 yield from power_instr_q(dut, pdecode2, ins, code)
1181
1182         # wait for all instructions to stop before checking
1183         while True:
1184             iqlen = yield dut.qlen_o
1185             if iqlen == 0:
1186                 break
1187             yield
1188         yield
1189         yield
1190         yield
1191         yield
1192         yield from wait_for_busy_clear(dut)
1193
1194         # check status
1195         yield from alusim.check(dut)
1196         yield from alusim.dump(dut)
1197
1198
1199 def scoreboard_sim(dut, alusim):
1200
1201     seed(0)
1202
1203     for i in range(1):
1204
1205         # set random values in the registers
1206         for i in range(1, dut.n_regs):
1207             #val = randint(0, (1<<alusim.rwidth)-1)
1208             #val = 31+i*3
1209             val = i
1210             yield dut.intregs.regs[i].reg.eq(val)
1211             alusim.setval(i, val)
1212
1213         # create some instructions (some random, some regression tests)
1214         instrs = []
1215         if False:
1216             instrs = create_random_ops(dut, 15, True, 4)
1217
1218         if False:  # LD/ST test (with immediate)
1219             instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0)))  # LD
1220             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1221
1222         if False:
1223             instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1224
1225         if False:
1226             instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1227             instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1228             instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1229
1230         if True:
1231             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1232                            0, 0, (0, 0)))
1233             instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1234                            0, 0, (0, 0)))
1235         if False:
1236             instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1237                            1, 7, (0, 0)))
1238         if False:
1239             instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1240                            0, 0, (0, 0)))
1241
1242         if False:
1243             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1244             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1245             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1246             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1247             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1248
1249         if False:
1250             instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1251             instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1252             instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1253
1254         if False:
1255             instrs.append((5, 6, 2, 1))
1256             instrs.append((2, 2, 4, 0))
1257             #instrs.append((2, 2, 3, 1))
1258
1259         if False:
1260             instrs.append((2, 1, 2, 3))
1261
1262         if False:
1263             instrs.append((2, 6, 2, 1))
1264             instrs.append((2, 1, 2, 0))
1265
1266         if False:
1267             instrs.append((1, 2, 7, 2))
1268             instrs.append((7, 1, 5, 0))
1269             instrs.append((4, 4, 1, 1))
1270
1271         if False:
1272             instrs.append((5, 6, 2, 2))
1273             instrs.append((1, 1, 4, 1))
1274             instrs.append((6, 5, 3, 0))
1275
1276         if False:
1277             # Write-after-Write Hazard
1278             instrs.append((3, 6, 7, 2))
1279             instrs.append((4, 4, 7, 1))
1280
1281         if False:
1282             # self-read/write-after-write followed by Read-after-Write
1283             instrs.append((1, 1, 1, 1))
1284             instrs.append((1, 5, 3, 0))
1285
1286         if False:
1287             # Read-after-Write followed by self-read-after-write
1288             instrs.append((5, 6, 1, 2))
1289             instrs.append((1, 1, 1, 1))
1290
1291         if False:
1292             # self-read-write sandwich
1293             instrs.append((5, 6, 1, 2))
1294             instrs.append((1, 1, 1, 1))
1295             instrs.append((1, 5, 3, 0))
1296
1297         if False:
1298             # very weird failure
1299             instrs.append((5, 2, 5, 2))
1300             instrs.append((2, 6, 3, 0))
1301             instrs.append((4, 2, 2, 1))
1302
1303         if False:
1304             v1 = 4
1305             yield dut.intregs.regs[5].reg.eq(v1)
1306             alusim.setval(5, v1)
1307             yield dut.intregs.regs[3].reg.eq(5)
1308             alusim.setval(3, 5)
1309             instrs.append((5, 3, 3, 4, (0, 0)))
1310             instrs.append((4, 2, 1, 2, (0, 1)))
1311
1312         if False:
1313             v1 = 6
1314             yield dut.intregs.regs[5].reg.eq(v1)
1315             alusim.setval(5, v1)
1316             yield dut.intregs.regs[3].reg.eq(5)
1317             alusim.setval(3, 5)
1318             instrs.append((5, 3, 3, 4, (0, 0)))
1319             instrs.append((4, 2, 1, 2, (1, 0)))
1320
1321         if False:
1322             instrs.append((4, 3, 5, 1, 0, (0, 0)))
1323             instrs.append((5, 2, 3, 1, 0, (0, 0)))
1324             instrs.append((7, 1, 5, 2, 0, (0, 0)))
1325             instrs.append((5, 6, 6, 4, 0, (0, 0)))
1326             instrs.append((7, 5, 2, 2, 0, (1, 0)))
1327             instrs.append((1, 7, 5, 0, 0, (0, 1)))
1328             instrs.append((1, 6, 1, 2, 0, (1, 0)))
1329             instrs.append((1, 6, 7, 3, 0, (0, 0)))
1330             instrs.append((6, 7, 7, 0, 0, (0, 0)))
1331
1332         # issue instruction(s), wait for issue to be free before proceeding
1333         for i, instr in enumerate(instrs):
1334             print (i, instr)
1335             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1336
1337             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1338                   (i, src1, src2, dest, op, fn_unit, opi, imm))
1339             alusim.op(op, opi, imm, src1, src2, dest)
1340             yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1341                                br_ok, br_fail)
1342
1343         # wait for all instructions to stop before checking
1344         while True:
1345             iqlen = yield dut.qlen_o
1346             if iqlen == 0:
1347                 break
1348             yield
1349         yield
1350         yield
1351         yield
1352         yield
1353         yield from wait_for_busy_clear(dut)
1354
1355         # check status
1356         yield from alusim.check(dut)
1357         yield from alusim.dump(dut)
1358
1359
1360 def test_scoreboard():
1361     regwidth = 64
1362     dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1363     alusim = RegSim(regwidth, 8)
1364     memsim = MemSim(16, 8)
1365
1366     m = Module()
1367     comb = m.d.comb
1368     instruction = Signal(32)
1369
1370     # set up the decoder (and simulator, later)
1371     pdecode = create_pdecode()
1372     #simulator = ISA(pdecode, initial_regs)
1373
1374     m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1375     m.submodules.sim = dut
1376
1377     comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1378     comb += pdecode2.dec.bigendian.eq(0)  # little / big?
1379
1380     vl = rtlil.convert(m, ports=dut.ports())
1381     with open("test_scoreboard6600.il", "w") as f:
1382         f.write(vl)
1383
1384     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1385                    vcd_name='test_powerboard6600.vcd')
1386
1387     #run_simulation(dut, scoreboard_sim(dut, alusim),
1388     #               vcd_name='test_scoreboard6600.vcd')
1389
1390     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1391     #                    vcd_name='test_scoreboard6600.vcd')
1392
1393
1394 if __name__ == '__main__':
1395     test_scoreboard()