src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13
  14 from compalu import ComputationUnitNoDelay
  15
  16 from alu_hier import ALU, BranchALU
  17 from nmutil.latch import SRLatch
  18 from nmutil.nmoperator import eq
  19
  20 from random import randint, seed
  21 from copy import deepcopy
  22 from math import log
  23
  24
  25 class Memory(Elaboratable):
  26     def __init__(self, regwid, addrw):
  27         self.ddepth = regwid/8
  28         depth = (1<<addrw) / self.ddepth
  29         self.adr   = Signal(addrw)
  30         self.dat_r = Signal(regwid)
  31         self.dat_w = Signal(regwid)
  32         self.we    = Signal()
  33         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         m.submodules.rdport = rdport = self.mem.read_port()
  38         m.submodules.wrport = wrport = self.mem.write_port()
  39         m.d.comb += [
  40             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  41             self.dat_r.eq(rdport.data),
  42             wrport.addr.eq(self.adr),
  43             wrport.data.eq(self.dat_w),
  44             wrport.en.eq(self.we),
  45         ]
  46         return m
  47
  48
  49 class MemSim:
  50     def __init__(self, regwid, addrw):
  51         self.regwid = regwid
  52         self.ddepth = regwid//8
  53         depth = (1<<addrw) // self.ddepth
  54         self.mem = list(range(0, depth))
  55
  56     def ld(self, addr):
  57         return self.mem[addr>>self.ddepth]
  58
  59     def st(self, addr, data):
  60         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  61
  62
  63 class CompUnitsBase(Elaboratable):
  64     """ Computation Unit Base class.
  65
  66         Amazingly, this class works recursively.  It's supposed to just
  67         look after some ALUs (that can handle the same operations),
  68         grouping them together, however it turns out that the same code
  69         can also group *groups* of Computation Units together as well.
  70
  71         Basically it was intended just to concatenate the ALU's issue,
  72         go_rd etc. signals together, which start out as bits and become
  73         sequences.  Turns out that the same trick works just as well
  74         on Computation Units!
  75
  76         So this class may be used recursively to present a top-level
  77         sequential concatenation of all the signals in and out of
  78         ALUs, whilst at the same time making it convenient to group
  79         ALUs together.
  80
  81         At the lower level, the intent is that groups of (identical)
  82         ALUs may be passed the same operation.  Even beyond that,
  83         the intent is that that group of (identical) ALUs actually
  84         share the *same pipeline* and as such become a "Concurrent
  85         Computation Unit" as defined by Mitch Alsup (see section
  86         11.4.9.3)
  87     """
  88     def __init__(self, rwid, units):
  89         """ Inputs:
  90
  91             * :rwid:   bit width of register file(s) - both FP and INT
  92             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  93         """
  94         self.units = units
  95         self.rwid = rwid
  96         self.rwid = rwid
  97         if units and isinstance(units[0], CompUnitsBase):
  98             self.n_units = 0
  99             for u in self.units:
 100                 self.n_units += u.n_units
 101         else:
 102             self.n_units = len(units)
 103
 104         n_units = self.n_units
 105
 106         # inputs
 107         self.issue_i = Signal(n_units, reset_less=True)
 108         self.go_rd_i = Signal(n_units, reset_less=True)
 109         self.go_wr_i = Signal(n_units, reset_less=True)
 110         self.shadown_i = Signal(n_units, reset_less=True)
 111         self.go_die_i = Signal(n_units, reset_less=True)
 112
 113         # outputs
 114         self.busy_o = Signal(n_units, reset_less=True)
 115         self.rd_rel_o = Signal(n_units, reset_less=True)
 116         self.req_rel_o = Signal(n_units, reset_less=True)
 117
 118         # in/out register data (note: not register#, actual data)
 119         self.data_o = Signal(rwid, reset_less=True)
 120         self.src1_i = Signal(rwid, reset_less=True)
 121         self.src2_i = Signal(rwid, reset_less=True)
 122         # input operand
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         comb = m.d.comb
 127
 128         for i, alu in enumerate(self.units):
 129             setattr(m.submodules, "comp%d" % i, alu)
 130
 131         go_rd_l = []
 132         go_wr_l = []
 133         issue_l = []
 134         busy_l = []
 135         req_rel_l = []
 136         rd_rel_l = []
 137         shadow_l = []
 138         godie_l = []
 139         for alu in self.units:
 140             req_rel_l.append(alu.req_rel_o)
 141             rd_rel_l.append(alu.rd_rel_o)
 142             shadow_l.append(alu.shadown_i)
 143             godie_l.append(alu.go_die_i)
 144             go_wr_l.append(alu.go_wr_i)
 145             go_rd_l.append(alu.go_rd_i)
 146             issue_l.append(alu.issue_i)
 147             busy_l.append(alu.busy_o)
 148         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 149         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 150         comb += self.busy_o.eq(Cat(*busy_l))
 151         comb += Cat(*godie_l).eq(self.go_die_i)
 152         comb += Cat(*shadow_l).eq(self.shadown_i)
 153         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 154         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 155         comb += Cat(*issue_l).eq(self.issue_i)
 156
 157         # connect data register input/output
 158
 159         # merge (OR) all integer FU / ALU outputs to a single value
 160         # bit of a hack: treereduce needs a list with an item named "data_o"
 161         if self.units:
 162             data_o = treereduce(self.units)
 163             comb += self.data_o.eq(data_o)
 164
 165         for i, alu in enumerate(self.units):
 166             comb += alu.src1_i.eq(self.src1_i)
 167             comb += alu.src2_i.eq(self.src2_i)
 168
 169         return m
 170
 171
 172 class CompUnitALUs(CompUnitsBase):
 173
 174     def __init__(self, rwid, opwid):
 175         """ Inputs:
 176
 177             * :rwid:   bit width of register file(s) - both FP and INT
 178             * :opwid:  operand bit width
 179         """
 180         self.opwid = opwid
 181
 182         # inputs
 183         self.oper_i = Signal(opwid, reset_less=True)
 184
 185         # Int ALUs
 186         add = ALU(rwid)
 187         sub = ALU(rwid)
 188         mul = ALU(rwid)
 189         shf = ALU(rwid)
 190
 191         units = []
 192         for alu in [add, sub, mul, shf]:
 193             units.append(ComputationUnitNoDelay(rwid, 2, alu))
 194
 195         CompUnitsBase.__init__(self, rwid, units)
 196
 197     def elaborate(self, platform):
 198         m = CompUnitsBase.elaborate(self, platform)
 199         comb = m.d.comb
 200
 201         # hand the same operation to all units
 202         for alu in self.units:
 203             comb += alu.oper_i.eq(self.oper_i)
 204         #comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add
 205         #comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub
 206         #comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul
 207         #comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf
 208
 209         return m
 210
 211
 212 class CompUnitBR(CompUnitsBase):
 213
 214     def __init__(self, rwid, opwid):
 215         """ Inputs:
 216
 217             * :rwid:   bit width of register file(s) - both FP and INT
 218             * :opwid:  operand bit width
 219
 220             Note: bgt unit is returned so that a shadow unit can be created
 221             for it
 222         """
 223         self.opwid = opwid
 224
 225         # inputs
 226         self.oper_i = Signal(opwid, reset_less=True)
 227
 228         # Branch ALU and CU
 229         self.bgt = BranchALU(rwid)
 230         self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt)
 231         CompUnitsBase.__init__(self, rwid, [self.br1])
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units
 238         for alu in self.units:
 239             comb += alu.oper_i.eq(self.oper_i)
 240         #comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt
 241
 242         return m
 243
 244
 245 class FunctionUnits(Elaboratable):
 246
 247     def __init__(self, n_regs, n_int_alus):
 248         self.n_regs = n_regs
 249         self.n_int_alus = n_int_alus
 250
 251         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 252         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 253         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 254
 255         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 256         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 257
 258         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 259         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 260         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 261
 262         self.req_rel_i = Signal(n_int_alus, reset_less = True)
 263         self.readable_o = Signal(n_int_alus, reset_less=True)
 264         self.writable_o = Signal(n_int_alus, reset_less=True)
 265
 266         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 267         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 268         self.go_die_i = Signal(n_int_alus, reset_less=True)
 269         self.req_rel_o = Signal(n_int_alus, reset_less=True)
 270         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 271
 272         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 273
 274     def elaborate(self, platform):
 275         m = Module()
 276         comb = m.d.comb
 277         sync = m.d.sync
 278
 279         n_intfus = self.n_int_alus
 280
 281         # Integer FU-FU Dep Matrix
 282         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 283         m.submodules.intfudeps = intfudeps
 284         # Integer FU-Reg Dep Matrix
 285         intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
 286         m.submodules.intregdeps = intregdeps
 287
 288         comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
 289         comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
 290
 291         comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
 292         comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
 293
 294         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 295         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 296         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 297
 298         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 299         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 300         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 301         comb += intfudeps.go_die_i.eq(self.go_die_i)
 302         comb += self.readable_o.eq(intfudeps.readable_o)
 303         comb += self.writable_o.eq(intfudeps.writable_o)
 304
 305         # Connect function issue / arrays, and dest/src1/src2
 306         comb += intregdeps.dest_i.eq(self.dest_i)
 307         comb += intregdeps.src1_i.eq(self.src1_i)
 308         comb += intregdeps.src2_i.eq(self.src2_i)
 309
 310         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 311         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 312         comb += intregdeps.go_die_i.eq(self.go_die_i)
 313         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 314
 315         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 316         comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
 317         comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
 318
 319         return m
 320
 321
 322 class Scoreboard(Elaboratable):
 323     def __init__(self, rwid, n_regs):
 324         """ Inputs:
 325
 326             * :rwid:   bit width of register file(s) - both FP and INT
 327             * :n_regs: depth of register file(s) - number of FP and INT regs
 328         """
 329         self.rwid = rwid
 330         self.n_regs = n_regs
 331
 332         # Register Files
 333         self.intregs = RegFileArray(rwid, n_regs)
 334         self.fpregs = RegFileArray(rwid, n_regs)
 335
 336         # issue q needs to get at these
 337         self.aluissue = IssueUnitGroup(4)
 338         self.brissue = IssueUnitGroup(1)
 339         # and these
 340         self.alu_oper_i = Signal(4, reset_less=True)
 341         self.br_oper_i = Signal(4, reset_less=True)
 342
 343         # inputs
 344         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 345         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 346         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 347         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 348
 349         # outputs
 350         self.issue_o = Signal(reset_less=True) # instruction was accepted
 351         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 352
 353         # for branch speculation experiment.  branch_direction = 0 if
 354         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 355         # branch_succ and branch_fail are requests to have the current
 356         # instruction be dependent on the branch unit "shadow" capability.
 357         self.branch_succ_i = Signal(reset_less=True)
 358         self.branch_fail_i = Signal(reset_less=True)
 359         self.branch_direction_o = Signal(2, reset_less=True)
 360
 361     def elaborate(self, platform):
 362         m = Module()
 363         comb = m.d.comb
 364         sync = m.d.sync
 365
 366         m.submodules.intregs = self.intregs
 367         m.submodules.fpregs = self.fpregs
 368
 369         # register ports
 370         int_dest = self.intregs.write_port("dest")
 371         int_src1 = self.intregs.read_port("src1")
 372         int_src2 = self.intregs.read_port("src2")
 373
 374         fp_dest = self.fpregs.write_port("dest")
 375         fp_src1 = self.fpregs.read_port("src1")
 376         fp_src2 = self.fpregs.read_port("src2")
 377
 378         # Int ALUs and Comp Units
 379         n_int_alus = 5
 380         cua = CompUnitALUs(self.rwid, 2)
 381         cub = CompUnitBR(self.rwid, 2)
 382         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
 383         bgt = cub.bgt # get at the branch computation unit
 384         br1 = cub.br1
 385
 386         # Int FUs
 387         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 388
 389         # Count of number of FUs
 390         n_intfus = n_int_alus
 391         n_fp_fus = 0 # for now
 392
 393         # Integer Priority Picker 1: Adder + Subtractor
 394         intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
 395         m.submodules.intpick1 = intpick1
 396
 397         # INT/FP Issue Unit
 398         regdecode = RegDecode(self.n_regs)
 399         m.submodules.regdecode = regdecode
 400         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 401         m.submodules.issueunit = issueunit
 402
 403         # Shadow Matrix.  currently n_intfus shadows, to be used for
 404         # write-after-write hazards.  NOTE: there is one extra for branches,
 405         # so the shadow width is increased by 1
 406         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 407         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 408
 409         # record previous instruction to cast shadow on current instruction
 410         prev_shadow = Signal(n_intfus)
 411
 412         # Branch Speculation recorder.  tracks the success/fail state as
 413         # each instruction is issued, so that when the branch occurs the
 414         # allow/cancel can be issued as appropriate.
 415         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 416
 417         #---------
 418         # ok start wiring things together...
 419         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 420         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 421         #---------
 422
 423         #---------
 424         # Issue Unit is where it starts.  set up some in/outs for this module
 425         #---------
 426         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 427                      regdecode.src1_i.eq(self.int_src1_i),
 428                      regdecode.src2_i.eq(self.int_src2_i),
 429                      regdecode.enable_i.eq(self.reg_enable_i),
 430                      self.issue_o.eq(issueunit.issue_o)
 431                     ]
 432
 433         # take these to outside (issue needs them)
 434         comb += cua.oper_i.eq(self.alu_oper_i)
 435         comb += cub.oper_i.eq(self.br_oper_i)
 436
 437         # TODO: issueunit.f (FP)
 438
 439         # and int function issue / busy arrays, and dest/src1/src2
 440         comb += intfus.dest_i.eq(regdecode.dest_o)
 441         comb += intfus.src1_i.eq(regdecode.src1_o)
 442         comb += intfus.src2_i.eq(regdecode.src2_o)
 443
 444         fn_issue_o = issueunit.fn_issue_o
 445
 446         comb += intfus.fn_issue_i.eq(fn_issue_o)
 447         comb += issueunit.busy_i.eq(cu.busy_o)
 448         comb += self.busy_o.eq(cu.busy_o.bool())
 449
 450         #---------
 451         # merge shadow matrices outputs
 452         #---------
 453
 454         # these are explained in ShadowMatrix docstring, and are to be
 455         # connected to the FUReg and FUFU Matrices, to get them to reset
 456         anydie = Signal(n_intfus, reset_less=True)
 457         allshadown = Signal(n_intfus, reset_less=True)
 458         shreset = Signal(n_intfus, reset_less=True)
 459         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 460         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 461         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 462
 463         #---------
 464         # connect fu-fu matrix
 465         #---------
 466
 467         # Group Picker... done manually for now.
 468         go_rd_o = intpick1.go_rd_o
 469         go_wr_o = intpick1.go_wr_o
 470         go_rd_i = intfus.go_rd_i
 471         go_wr_i = intfus.go_wr_i
 472         go_die_i = intfus.go_die_i
 473         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 474         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 475         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 476         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 477
 478         # Connect Picker
 479         #---------
 480         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 481         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 482         int_rd_o = intfus.readable_o
 483         int_wr_o = intfus.writable_o
 484         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 485         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 486
 487         #---------
 488         # Shadow Matrix
 489         #---------
 490
 491         comb += shadows.issue_i.eq(fn_issue_o)
 492         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 493         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 494         #---------
 495         # NOTE; this setup is for the instruction order preservation...
 496
 497         # connect shadows / go_dies to Computation Units
 498         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 499         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 500
 501         # ok connect first n_int_fu shadows to busy lines, to create an
 502         # instruction-order linked-list-like arrangement, using a bit-matrix
 503         # (instead of e.g. a ring buffer).
 504         # XXX TODO
 505
 506         # when written, the shadow can be cancelled (and was good)
 507         for i in range(n_intfus):
 508             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 509
 510         # *previous* instruction shadows *current* instruction, and, obviously,
 511         # if the previous is completed (!busy) don't cast the shadow!
 512         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 513         for i in range(n_intfus):
 514             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 515
 516         #---------
 517         # ... and this is for branch speculation.  it uses the extra bit
 518         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 519         # only needs to set shadow_i, s_fail_i and s_good_i
 520
 521         # issue captures shadow_i (if enabled)
 522         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 523
 524         bactive = Signal(reset_less=True)
 525         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 526
 527         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 528         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 529             comb += bshadow.issue_i.eq(fn_issue_o)
 530             for i in range(n_intfus):
 531                 with m.If(fn_issue_o & (Const(1<<i))):
 532                     comb += bshadow.shadow_i[i][0].eq(1)
 533
 534         # finally, we need an indicator to the test infrastructure as to
 535         # whether the branch succeeded or failed, plus, link up to the
 536         # "recorder" of whether the instruction was under shadow or not
 537
 538         with m.If(br1.issue_i):
 539             sync += bspec.active_i.eq(1)
 540         with m.If(self.branch_succ_i):
 541             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 542         with m.If(self.branch_fail_i):
 543             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 544
 545         # branch is active (TODO: a better signal: this is over-using the
 546         # go_write signal - actually the branch should not be "writing")
 547         with m.If(br1.go_wr_i):
 548             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 549             sync += bspec.active_i.eq(0)
 550             comb += bspec.br_i.eq(1)
 551             # branch occurs if data == 1, failed if data == 0
 552             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 553             for i in range(n_intfus):
 554                 # *expected* direction of the branch matched against *actual*
 555                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 556                 # ... or it didn't
 557                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 558
 559         #---------
 560         # Connect Register File(s)
 561         #---------
 562         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 563         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 564         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 565
 566         # connect ALUs to regfule
 567         comb += int_dest.data_i.eq(cu.data_o)
 568         comb += cu.src1_i.eq(int_src1.data_o)
 569         comb += cu.src2_i.eq(int_src2.data_o)
 570
 571         # connect ALU Computation Units
 572         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 573         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 574         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 575
 576         return m
 577
 578     def __iter__(self):
 579         yield from self.intregs
 580         yield from self.fpregs
 581         yield self.int_dest_i
 582         yield self.int_src1_i
 583         yield self.int_src2_i
 584         yield self.issue_o
 585         yield self.branch_succ_i
 586         yield self.branch_fail_i
 587         yield self.branch_direction_o
 588
 589     def ports(self):
 590         return list(self)
 591
 592 class IssueToScoreboard(Elaboratable):
 593
 594     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 595         self.qlen = qlen
 596         self.n_in = n_in
 597         self.n_out = n_out
 598         self.rwid = rwid
 599         self.opw = opwid
 600         self.n_regs = n_regs
 601
 602         mqbits = (int(log(qlen) / log(2))+2, False)
 603         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 604         self.p_ready_o = Signal() # instructions were added
 605         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 606
 607         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 608         self.qlen_o = Signal(mqbits, reset_less=True)
 609
 610     def elaborate(self, platform):
 611         m = Module()
 612         comb = m.d.comb
 613         sync = m.d.sync
 614
 615         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 616         sc = Scoreboard(self.rwid, self.n_regs)
 617         m.submodules.iq = iq
 618         m.submodules.sc = sc
 619
 620         # get at the regfile for testing
 621         self.intregs = sc.intregs
 622
 623         # and the "busy" signal and instruction queue length
 624         comb += self.busy_o.eq(sc.busy_o)
 625         comb += self.qlen_o.eq(iq.qlen_o)
 626
 627         # link up instruction queue
 628         comb += iq.p_add_i.eq(self.p_add_i)
 629         comb += self.p_ready_o.eq(iq.p_ready_o)
 630         for i in range(self.n_in):
 631             comb += eq(iq.data_i[i], self.data_i[i])
 632
 633         # take instruction and process it.  note that it's possible to
 634         # "inspect" the queue contents *without* actually removing the
 635         # items.  items are only removed when the
 636
 637         # in "waiting" state
 638         wait_issue_br = Signal()
 639         wait_issue_alu = Signal()
 640
 641         with m.If(wait_issue_br | wait_issue_alu):
 642             # set instruction pop length to 1 if the unit accepted
 643             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 644                 with m.If(iq.qlen_o != 0):
 645                     comb += iq.n_sub_i.eq(1)
 646             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 647                 with m.If(iq.qlen_o != 0):
 648                     comb += iq.n_sub_i.eq(1)
 649
 650         # see if some instruction(s) are here.  note that this is
 651         # "inspecting" the in-place queue.  note also that on the
 652         # cycle following "waiting" for fn_issue_o to be set, the
 653         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 654         with m.If(iq.qlen_o != 0):
 655             # get the operands and operation
 656             dest = iq.data_o[0].dest_i
 657             src1 = iq.data_o[0].src1_i
 658             src2 = iq.data_o[0].src2_i
 659             op = iq.data_o[0].oper_i
 660
 661             # set the src/dest regs
 662             comb += sc.int_dest_i.eq(dest)
 663             comb += sc.int_src1_i.eq(src1)
 664             comb += sc.int_src2_i.eq(src2)
 665             comb += sc.reg_enable_i.eq(1) # enable the regfile
 666
 667             # choose a Function-Unit-Group
 668             with m.If((op & (0x3<<2)) != 0): # branch
 669                 comb += sc.brissue.insn_i.eq(1)
 670                 comb += sc.br_oper_i.eq(op & 0x3)
 671                 comb += wait_issue_br.eq(1)
 672             with m.Else():                   # alu
 673                 comb += sc.aluissue.insn_i.eq(1)
 674                 comb += sc.alu_oper_i.eq(op & 0x3)
 675                 comb += wait_issue_alu.eq(1)
 676
 677             # XXX TODO
 678             # these indicate that the instruction is to be made
 679             # shadow-dependent on
 680             # (either) branch success or branch fail
 681             #yield sc.branch_fail_i.eq(branch_fail)
 682             #yield sc.branch_succ_i.eq(branch_success)
 683
 684         return m
 685
 686     def __iter__(self):
 687         yield self.p_ready_o
 688         for o in self.data_i:
 689             yield from list(o)
 690         yield self.p_add_i
 691
 692     def ports(self):
 693         return list(self)
 694
 695
 696 IADD = 0
 697 ISUB = 1
 698 IMUL = 2
 699 ISHF = 3
 700 IBGT = 4
 701 IBLT = 5
 702 IBEQ = 6
 703 IBNE = 7
 704
 705 class RegSim:
 706     def __init__(self, rwidth, nregs):
 707         self.rwidth = rwidth
 708         self.regs = [0] * nregs
 709
 710     def op(self, op, src1, src2, dest):
 711         maxbits = (1 << self.rwidth) - 1
 712         src1 = self.regs[src1] & maxbits
 713         src2 = self.regs[src2] & maxbits
 714         if op == IADD:
 715             val = src1 + src2
 716         elif op == ISUB:
 717             val = src1 - src2
 718         elif op == IMUL:
 719             val = src1 * src2
 720         elif op == ISHF:
 721             val = src1 >> (src2 & maxbits)
 722         elif op == IBGT:
 723             val = int(src1 > src2)
 724         elif op == IBLT:
 725             val = int(src1 < src2)
 726         elif op == IBEQ:
 727             val = int(src1 == src2)
 728         elif op == IBNE:
 729             val = int(src1 != src2)
 730         val &= maxbits
 731         self.setval(dest, val)
 732         return val
 733
 734     def setval(self, dest, val):
 735         print ("sim setval", dest, hex(val))
 736         self.regs[dest] = val
 737
 738     def dump(self, dut):
 739         for i, val in enumerate(self.regs):
 740             reg = yield dut.intregs.regs[i].reg
 741             okstr = "OK" if reg == val else "!ok"
 742             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 743
 744     def check(self, dut):
 745         for i, val in enumerate(self.regs):
 746             reg = yield dut.intregs.regs[i].reg
 747             if reg != val:
 748                 print("reg %d expected %x received %x\n" % (i, val, reg))
 749                 yield from self.dump(dut)
 750                 assert False
 751
 752 def instr_q(dut, op, src1, src2, dest, branch_success, branch_fail):
 753     instrs = [{'oper_i': op, 'dest_i': dest, 'src1_i': src1, 'src2_i': src2}]
 754
 755     sendlen = 1
 756     for idx in range(sendlen):
 757         yield from eq(dut.data_i[idx], instrs[idx])
 758         di = yield dut.data_i[idx]
 759         print ("senddata %d %x" % (idx, di))
 760     yield dut.p_add_i.eq(sendlen)
 761     yield
 762     o_p_ready = yield dut.p_ready_o
 763     while not o_p_ready:
 764         yield
 765         o_p_ready = yield dut.p_ready_o
 766
 767     yield dut.p_add_i.eq(0)
 768
 769
 770 def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail):
 771     yield from disable_issue(dut)
 772     yield dut.int_dest_i.eq(dest)
 773     yield dut.int_src1_i.eq(src1)
 774     yield dut.int_src2_i.eq(src2)
 775     if (op & (0x3<<2)) != 0: # branch
 776         yield dut.brissue.insn_i.eq(1)
 777         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 778         dut_issue = dut.brissue
 779     else:
 780         yield dut.aluissue.insn_i.eq(1)
 781         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 782         dut_issue = dut.aluissue
 783     yield dut.reg_enable_i.eq(1)
 784
 785     # these indicate that the instruction is to be made shadow-dependent on
 786     # (either) branch success or branch fail
 787     yield dut.branch_fail_i.eq(branch_fail)
 788     yield dut.branch_succ_i.eq(branch_success)
 789
 790     yield
 791     yield from wait_for_issue(dut, dut_issue)
 792
 793
 794 def print_reg(dut, rnums):
 795     rs = []
 796     for rnum in rnums:
 797         reg = yield dut.intregs.regs[rnum].reg
 798         rs.append("%x" % reg)
 799     rnums = map(str, rnums)
 800     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 801
 802
 803 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 804     insts = []
 805     for i in range(n_ops):
 806         src1 = randint(1, dut.n_regs-1)
 807         src2 = randint(1, dut.n_regs-1)
 808         dest = randint(1, dut.n_regs-1)
 809         op = randint(0, max_opnums)
 810
 811         if shadowing:
 812             insts.append((src1, src2, dest, op, (0, 0)))
 813         else:
 814             insts.append((src1, src2, dest, op))
 815     return insts
 816
 817
 818 def wait_for_busy_clear(dut):
 819     while True:
 820         busy_o = yield dut.busy_o
 821         if not busy_o:
 822             break
 823         print ("busy",)
 824         yield
 825
 826 def disable_issue(dut):
 827     yield dut.aluissue.insn_i.eq(0)
 828     yield dut.brissue.insn_i.eq(0)
 829
 830
 831 def wait_for_issue(dut, dut_issue):
 832     while True:
 833         issue_o = yield dut_issue.fn_issue_o
 834         if issue_o:
 835             yield from disable_issue(dut)
 836             yield dut.reg_enable_i.eq(0)
 837             break
 838         print ("busy",)
 839         #yield from print_reg(dut, [1,2,3])
 840         yield
 841     #yield from print_reg(dut, [1,2,3])
 842
 843 def scoreboard_branch_sim(dut, alusim):
 844
 845     iseed = 3
 846
 847     for i in range(1):
 848
 849         print ("rseed", iseed)
 850         seed(iseed)
 851         iseed += 1
 852
 853         yield dut.branch_direction_o.eq(0)
 854
 855         # set random values in the registers
 856         for i in range(1, dut.n_regs):
 857             val = 31+i*3
 858             val = randint(0, (1<<alusim.rwidth)-1)
 859             yield dut.intregs.regs[i].reg.eq(val)
 860             alusim.setval(i, val)
 861
 862         if False:
 863             # create some instructions: branches create a tree
 864             insts = create_random_ops(dut, 1, True, 1)
 865             #insts.append((6, 6, 1, 2, (0, 0)))
 866             #insts.append((4, 3, 3, 0, (0, 0)))
 867
 868             src1 = randint(1, dut.n_regs-1)
 869             src2 = randint(1, dut.n_regs-1)
 870             #op = randint(4, 7)
 871             op = 4 # only BGT at the moment
 872
 873             branch_ok = create_random_ops(dut, 1, True, 1)
 874             branch_fail = create_random_ops(dut, 1, True, 1)
 875
 876             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 877
 878         if True:
 879             insts = []
 880             insts.append( (3, 5, 2, 0, (0, 0)) )
 881             branch_ok = []
 882             branch_fail = []
 883             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 884             branch_ok.append( None )
 885             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 886             #branch_fail.append( None )
 887             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 888
 889         siminsts = deepcopy(insts)
 890
 891         # issue instruction(s)
 892         i = -1
 893         instrs = insts
 894         branch_direction = 0
 895         while instrs:
 896             yield
 897             yield
 898             i += 1
 899             branch_direction = yield dut.branch_direction_o # way branch went
 900             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 901             if branch_direction == 1 and shadow_on:
 902                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 903                 continue # branch was "success" and this is a "failed"... skip
 904             if branch_direction == 2 and shadow_off:
 905                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 906                 continue # branch was "fail" and this is a "success"... skip
 907             if branch_direction != 0:
 908                 shadow_on = 0
 909                 shadow_off = 0
 910             is_branch = op >= 4
 911             if is_branch:
 912                 branch_ok, branch_fail = dest
 913                 dest = src2
 914                 # ok zip up the branch success / fail instructions and
 915                 # drop them into the queue, one marked "to have branch success"
 916                 # the other to be marked shadow branch "fail".
 917                 # one out of each of these will be cancelled
 918                 for ok, fl in zip(branch_ok, branch_fail):
 919                     if ok:
 920                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
 921                     if fl:
 922                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
 923             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
 924                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 925             yield from int_instr(dut, op, src1, src2, dest,
 926                                  shadow_on, shadow_off)
 927
 928         # wait for all instructions to stop before checking
 929         yield
 930         yield from wait_for_busy_clear(dut)
 931
 932         i = -1
 933         while siminsts:
 934             instr = siminsts.pop(0)
 935             if instr is None:
 936                 continue
 937             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
 938             i += 1
 939             is_branch = op >= 4
 940             if is_branch:
 941                 branch_ok, branch_fail = dest
 942                 dest = src2
 943             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
 944                             (i, src1, src2, dest, op, shadow_on, shadow_off))
 945             branch_res = alusim.op(op, src1, src2, dest)
 946             if is_branch:
 947                 if branch_res:
 948                     siminsts += branch_ok
 949                 else:
 950                     siminsts += branch_fail
 951
 952         # check status
 953         yield from alusim.check(dut)
 954         yield from alusim.dump(dut)
 955
 956
 957 def scoreboard_sim(dut, alusim):
 958
 959     #seed(2)
 960
 961     for i in range(1):
 962
 963         # set random values in the registers
 964         for i in range(1, dut.n_regs):
 965             val = randint(0, (1<<alusim.rwidth)-1)
 966             #val = 31+i*3
 967             #val = i
 968             yield dut.intregs.regs[i].reg.eq(val)
 969             alusim.setval(i, val)
 970
 971         # create some instructions (some random, some regression tests)
 972         instrs = []
 973         if True:
 974             instrs = create_random_ops(dut, 15, True, 3)
 975
 976         if False:
 977             instrs.append( (7, 3, 2, 4, (0, 0)) )
 978             instrs.append( (7, 6, 6, 2, (0, 0)) )
 979             instrs.append( (1, 7, 2, 2, (0, 0)) )
 980
 981
 982         if False:
 983             instrs.append((2, 3, 3, 0, (0, 0)))
 984             instrs.append((5, 3, 3, 1, (0, 0)))
 985             instrs.append((3, 5, 5, 2, (0, 0)))
 986             instrs.append((5, 3, 3, 3, (0, 0)))
 987             instrs.append((3, 5, 5, 0, (0, 0)))
 988
 989         if False:
 990             instrs.append((5, 6, 2, 1))
 991             instrs.append((2, 2, 4, 0))
 992             #instrs.append((2, 2, 3, 1))
 993
 994         if False:
 995             instrs.append((2, 1, 2, 3))
 996
 997         if False:
 998             instrs.append((2, 6, 2, 1))
 999             instrs.append((2, 1, 2, 0))
1000
1001         if False:
1002             instrs.append((1, 2, 7, 2))
1003             instrs.append((7, 1, 5, 0))
1004             instrs.append((4, 4, 1, 1))
1005
1006         if False:
1007             instrs.append((5, 6, 2, 2))
1008             instrs.append((1, 1, 4, 1))
1009             instrs.append((6, 5, 3, 0))
1010
1011         if False:
1012             # Write-after-Write Hazard
1013             instrs.append( (3, 6, 7, 2) )
1014             instrs.append( (4, 4, 7, 1) )
1015
1016         if False:
1017             # self-read/write-after-write followed by Read-after-Write
1018             instrs.append((1, 1, 1, 1))
1019             instrs.append((1, 5, 3, 0))
1020
1021         if False:
1022             # Read-after-Write followed by self-read-after-write
1023             instrs.append((5, 6, 1, 2))
1024             instrs.append((1, 1, 1, 1))
1025
1026         if False:
1027             # self-read-write sandwich
1028             instrs.append((5, 6, 1, 2))
1029             instrs.append((1, 1, 1, 1))
1030             instrs.append((1, 5, 3, 0))
1031
1032         if False:
1033             # very weird failure
1034             instrs.append( (5, 2, 5, 2) )
1035             instrs.append( (2, 6, 3, 0) )
1036             instrs.append( (4, 2, 2, 1) )
1037
1038         if False:
1039             v1 = 4
1040             yield dut.intregs.regs[5].reg.eq(v1)
1041             alusim.setval(5, v1)
1042             yield dut.intregs.regs[3].reg.eq(5)
1043             alusim.setval(3, 5)
1044             instrs.append((5, 3, 3, 4, (0, 0)))
1045             instrs.append((4, 2, 1, 2, (0, 1)))
1046
1047         if False:
1048             v1 = 6
1049             yield dut.intregs.regs[5].reg.eq(v1)
1050             alusim.setval(5, v1)
1051             yield dut.intregs.regs[3].reg.eq(5)
1052             alusim.setval(3, 5)
1053             instrs.append((5, 3, 3, 4, (0, 0)))
1054             instrs.append((4, 2, 1, 2, (1, 0)))
1055
1056         if False:
1057             instrs.append( (4, 3, 5, 1, (0, 0)) )
1058             instrs.append( (5, 2, 3, 1, (0, 0)) )
1059             instrs.append( (7, 1, 5, 2, (0, 0)) )
1060             instrs.append( (5, 6, 6, 4, (0, 0)) )
1061             instrs.append( (7, 5, 2, 2, (1, 0)) )
1062             instrs.append( (1, 7, 5, 0, (0, 1)) )
1063             instrs.append( (1, 6, 1, 2, (1, 0)) )
1064             instrs.append( (1, 6, 7, 3, (0, 0)) )
1065             instrs.append( (6, 7, 7, 0, (0, 0)) )
1066
1067         # issue instruction(s), wait for issue to be free before proceeding
1068         for i, (src1, src2, dest, op, (br_ok, br_fail)) in enumerate(instrs):
1069
1070             print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op))
1071             alusim.op(op, src1, src2, dest)
1072             yield from instr_q(dut, op, src1, src2, dest, br_ok, br_fail)
1073
1074         # wait for all instructions to stop before checking
1075         while True:
1076             iqlen = yield dut.qlen_o
1077             if iqlen == 0:
1078                 break
1079             yield
1080         yield
1081         yield
1082         yield
1083         yield
1084         yield from wait_for_busy_clear(dut)
1085
1086         # check status
1087         yield from alusim.check(dut)
1088         yield from alusim.dump(dut)
1089
1090
1091 def test_scoreboard():
1092     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1093     alusim = RegSim(16, 8)
1094     memsim = MemSim(16, 16)
1095     vl = rtlil.convert(dut, ports=dut.ports())
1096     with open("test_scoreboard6600.il", "w") as f:
1097         f.write(vl)
1098
1099     run_simulation(dut, scoreboard_sim(dut, alusim),
1100                         vcd_name='test_scoreboard6600.vcd')
1101
1102     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1103     #                    vcd_name='test_scoreboard6600.vcd')
1104
1105
1106 if __name__ == '__main__':
1107     test_scoreboard()