1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
6 from soc.regfile.regfile import RegFileArray, treereduce
7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
9 from soc.scoreboard.global_pending import GlobalPending
10 from soc.scoreboard.group_picker import GroupPicker
11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
14 from soc.scoreboard.memfu import MemFunctionUnits
16 from compalu import ComputationUnitNoDelay
17 from compldst import LDSTCompUnit
18 from testmem import TestMemory
20 from alu_hier import ALU, BranchALU
21 from nmutil.latch import SRLatch
22 from nmutil.nmoperator import eq
24 from random import randint, seed
25 from copy import deepcopy
26 from math import log
29 class MemSim:
31 self.regwid = regwid
32 self.ddepth = 1 # regwid//8
33 depth = (1<<addrw) // self.ddepth
34 self.mem = list(range(0, depth))
40 self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
43 class CompUnitsBase(Elaboratable):
44 """ Computation Unit Base class.
46 Amazingly, this class works recursively. It's supposed to just
47 look after some ALUs (that can handle the same operations),
48 grouping them together, however it turns out that the same code
49 can also group *groups* of Computation Units together as well.
51 Basically it was intended just to concatenate the ALU's issue,
52 go_rd etc. signals together, which start out as bits and become
53 sequences. Turns out that the same trick works just as well
54 on Computation Units!
56 So this class may be used recursively to present a top-level
57 sequential concatenation of all the signals in and out of
58 ALUs, whilst at the same time making it convenient to group
59 ALUs together.
61 At the lower level, the intent is that groups of (identical)
62 ALUs may be passed the same operation. Even beyond that,
63 the intent is that that group of (identical) ALUs actually
64 share the *same pipeline* and as such become a "Concurrent
65 Computation Unit" as defined by Mitch Alsup (see section
66 11.4.9.3)
67 """
68 def __init__(self, rwid, units, ldstmode=False):
69 """ Inputs:
71 * :rwid: bit width of register file(s) - both FP and INT
72 * :units: sequence of ALUs (or CompUnitsBase derivatives)
73 """
74 self.units = units
75 self.ldstmode = ldstmode
76 self.rwid = rwid
77 self.rwid = rwid
78 if units and isinstance(units[0], CompUnitsBase):
79 self.n_units = 0
80 for u in self.units:
81 self.n_units += u.n_units
82 else:
83 self.n_units = len(units)
85 n_units = self.n_units
87 # inputs
88 self.issue_i = Signal(n_units, reset_less=True)
89 self.go_rd_i = Signal(n_units, reset_less=True)
90 self.go_wr_i = Signal(n_units, reset_less=True)
92 self.go_die_i = Signal(n_units, reset_less=True)
93 if ldstmode:
95 self.go_st_i = Signal(n_units, reset_less=True)
97 # outputs
98 self.busy_o = Signal(n_units, reset_less=True)
99 self.rd_rel_o = Signal(n_units, reset_less=True)
100 self.req_rel_o = Signal(n_units, reset_less=True)
101 self.done_o = Signal(n_units, reset_less=True)
102 if ldstmode:
103 self.ld_o = Signal(n_units, reset_less=True) # op is LD
104 self.st_o = Signal(n_units, reset_less=True) # op is ST
106 self.sto_rel_o = Signal(n_units, reset_less=True)
108 self.stwd_mem_o = Signal(n_units, reset_less=True)
111 # in/out register data (note: not register#, actual data)
112 self.data_o = Signal(rwid, reset_less=True)
113 self.src1_i = Signal(rwid, reset_less=True)
114 self.src2_i = Signal(rwid, reset_less=True)
115 # input operand
117 def elaborate(self, platform):
118 m = Module()
119 comb = m.d.comb
121 for i, alu in enumerate(self.units):
122 setattr(m.submodules, "comp%d" % i, alu)
124 go_rd_l = []
125 go_wr_l = []
126 issue_l = []
127 busy_l = []
128 req_rel_l = []
129 done_l = []
130 rd_rel_l = []
132 godie_l = []
133 for alu in self.units:
134 req_rel_l.append(alu.req_rel_o)
135 done_l.append(alu.done_o)
136 rd_rel_l.append(alu.rd_rel_o)
138 godie_l.append(alu.go_die_i)
139 go_wr_l.append(alu.go_wr_i)
140 go_rd_l.append(alu.go_rd_i)
141 issue_l.append(alu.issue_i)
142 busy_l.append(alu.busy_o)
143 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
144 comb += self.req_rel_o.eq(Cat(*req_rel_l))
145 comb += self.done_o.eq(Cat(*done_l))
146 comb += self.busy_o.eq(Cat(*busy_l))
147 comb += Cat(*godie_l).eq(self.go_die_i)
149 comb += Cat(*go_wr_l).eq(self.go_wr_i)
150 comb += Cat(*go_rd_l).eq(self.go_rd_i)
151 comb += Cat(*issue_l).eq(self.issue_i)
153 # connect data register input/output
155 # merge (OR) all integer FU / ALU outputs to a single value
156 if self.units:
157 data_o = treereduce(self.units, "data_o")
158 comb += self.data_o.eq(data_o)
159 if self.ldstmode:
163 for i, alu in enumerate(self.units):
164 comb += alu.src1_i.eq(self.src1_i)
165 comb += alu.src2_i.eq(self.src2_i)
167 if not self.ldstmode:
168 return m
170 ldmem_l = []
171 stmem_l = []
173 go_st_l = []
174 ld_l = []
175 st_l = []
177 sto_rel_l = []
178 for alu in self.units:
179 ld_l.append(alu.ld_o)
180 st_l.append(alu.st_o)
182 sto_rel_l.append(alu.sto_rel_o)
184 stmem_l.append(alu.stwd_mem_o)
186 go_st_l.append(alu.go_st_i)
187 comb += self.ld_o.eq(Cat(*ld_l))
188 comb += self.st_o.eq(Cat(*st_l))
190 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
192 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
194 comb += Cat(*go_st_l).eq(self.go_st_i)
196 return m
199 class CompUnitLDSTs(CompUnitsBase):
201 def __init__(self, rwid, opwid, n_ldsts, mem):
202 """ Inputs:
204 * :rwid: bit width of register file(s) - both FP and INT
205 * :opwid: operand bit width
206 """
207 self.opwid = opwid
209 # inputs
210 self.oper_i = Signal(opwid, reset_less=True)
211 self.imm_i = Signal(rwid, reset_less=True)
213 # Int ALUs
214 self.alus = []
215 for i in range(n_ldsts):
216 self.alus.append(ALU(rwid))
218 units = []
219 for alu in self.alus:
220 aluopwid = 4 # see compldst.py for "internal" opcode
221 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
223 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
225 def elaborate(self, platform):
226 m = CompUnitsBase.elaborate(self, platform)
227 comb = m.d.comb
229 # hand the same operation to all units, 4 lower bits though
230 for alu in self.units:
231 comb += alu.oper_i[0:4].eq(self.oper_i)
232 comb += alu.imm_i.eq(self.imm_i)
233 comb += alu.isalu_i.eq(0)
235 return m
238 class CompUnitALUs(CompUnitsBase):
240 def __init__(self, rwid, opwid, n_alus):
241 """ Inputs:
243 * :rwid: bit width of register file(s) - both FP and INT
244 * :opwid: operand bit width
245 """
246 self.opwid = opwid
248 # inputs
249 self.oper_i = Signal(opwid, reset_less=True)
250 self.imm_i = Signal(rwid, reset_less=True)
252 # Int ALUs
253 alus = []
254 for i in range(n_alus):
255 alus.append(ALU(rwid))
257 units = []
258 for alu in alus:
259 aluopwid = 3 # extra bit for immediate mode
260 units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
262 CompUnitsBase.__init__(self, rwid, units)
264 def elaborate(self, platform):
265 m = CompUnitsBase.elaborate(self, platform)
266 comb = m.d.comb
268 # hand the same operation to all units, only lower 3 bits though
269 for alu in self.units:
270 comb += alu.oper_i[0:3].eq(self.oper_i)
271 comb += alu.imm_i.eq(self.imm_i)
273 return m
276 class CompUnitBR(CompUnitsBase):
278 def __init__(self, rwid, opwid):
279 """ Inputs:
281 * :rwid: bit width of register file(s) - both FP and INT
282 * :opwid: operand bit width
284 Note: bgt unit is returned so that a shadow unit can be created
285 for it
286 """
287 self.opwid = opwid
289 # inputs
290 self.oper_i = Signal(opwid, reset_less=True)
291 self.imm_i = Signal(rwid, reset_less=True)
293 # Branch ALU and CU
294 self.bgt = BranchALU(rwid)
295 aluopwid = 3 # extra bit for immediate mode
296 self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
297 CompUnitsBase.__init__(self, rwid, [self.br1])
299 def elaborate(self, platform):
300 m = CompUnitsBase.elaborate(self, platform)
301 comb = m.d.comb
303 # hand the same operation to all units
304 for alu in self.units:
305 comb += alu.oper_i.eq(self.oper_i)
306 comb += alu.imm_i.eq(self.imm_i)
308 return m
311 class FunctionUnits(Elaboratable):
313 def __init__(self, n_regs, n_int_alus):
314 self.n_regs = n_regs
315 self.n_int_alus = n_int_alus
317 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
318 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
319 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
321 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
322 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
324 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
325 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
326 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
329 self.writable_o = Signal(n_int_alus, reset_less=True)
331 self.go_rd_i = Signal(n_int_alus, reset_less=True)
332 self.go_wr_i = Signal(n_int_alus, reset_less=True)
333 self.go_die_i = Signal(n_int_alus, reset_less=True)
334 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
336 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
338 def elaborate(self, platform):
339 m = Module()
340 comb = m.d.comb
341 sync = m.d.sync
343 n_intfus = self.n_int_alus
345 # Integer FU-FU Dep Matrix
346 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
347 m.submodules.intfudeps = intfudeps
348 # Integer FU-Reg Dep Matrix
349 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
350 m.submodules.intregdeps = intregdeps
352 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
353 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
355 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
356 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
358 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
359 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
360 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
362 comb += intfudeps.issue_i.eq(self.fn_issue_i)
363 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
364 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
365 comb += intfudeps.go_die_i.eq(self.go_die_i)
367 comb += self.writable_o.eq(intfudeps.writable_o)
369 # Connect function issue / arrays, and dest/src1/src2
370 comb += intregdeps.dest_i.eq(self.dest_i)
371 comb += intregdeps.src_i[0].eq(self.src1_i)
372 comb += intregdeps.src_i[1].eq(self.src2_i)
374 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
375 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
376 comb += intregdeps.go_die_i.eq(self.go_die_i)
377 comb += intregdeps.issue_i.eq(self.fn_issue_i)
379 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
380 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
381 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
383 return m
386 class Scoreboard(Elaboratable):
387 def __init__(self, rwid, n_regs):
388 """ Inputs:
390 * :rwid: bit width of register file(s) - both FP and INT
391 * :n_regs: depth of register file(s) - number of FP and INT regs
392 """
393 self.rwid = rwid
394 self.n_regs = n_regs
396 # Register Files
397 self.intregs = RegFileArray(rwid, n_regs)
398 self.fpregs = RegFileArray(rwid, n_regs)
400 # Memory (test for now)
401 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
403 # issue q needs to get at these
404 self.aluissue = IssueUnitGroup(2)
405 self.lsissue = IssueUnitGroup(2)
406 self.brissue = IssueUnitGroup(1)
407 # and these
408 self.alu_oper_i = Signal(4, reset_less=True)
409 self.alu_imm_i = Signal(rwid, reset_less=True)
410 self.br_oper_i = Signal(4, reset_less=True)
411 self.br_imm_i = Signal(rwid, reset_less=True)
412 self.ls_oper_i = Signal(4, reset_less=True)
413 self.ls_imm_i = Signal(rwid, reset_less=True)
415 # inputs
416 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
417 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
418 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
419 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
421 # outputs
422 self.issue_o = Signal(reset_less=True) # instruction was accepted
423 self.busy_o = Signal(reset_less=True) # at least one CU is busy
425 # for branch speculation experiment. branch_direction = 0 if
426 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
427 # branch_succ and branch_fail are requests to have the current
428 # instruction be dependent on the branch unit "shadow" capability.
429 self.branch_succ_i = Signal(reset_less=True)
430 self.branch_fail_i = Signal(reset_less=True)
431 self.branch_direction_o = Signal(2, reset_less=True)
433 def elaborate(self, platform):
434 m = Module()
435 comb = m.d.comb
436 sync = m.d.sync
438 m.submodules.intregs = self.intregs
439 m.submodules.fpregs = self.fpregs
440 m.submodules.mem = mem = self.mem
442 # register ports
443 int_dest = self.intregs.write_port("dest")
447 fp_dest = self.fpregs.write_port("dest")
451 # Int ALUs and BR ALUs
452 n_int_alus = 5
453 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
454 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
456 # LDST Comp Units
457 n_ldsts = 2
458 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
460 # Comp Units
461 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
462 bgt = cub.bgt # get at the branch computation unit
463 br1 = cub.br1
465 # Int FUs
466 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
468 # Memory FUs
469 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
471 # Memory Priority Picker 1: one gateway per memory port
472 mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
473 m.submodules.mempick1 = mempick1
475 # Count of number of FUs
476 n_intfus = n_int_alus
477 n_fp_fus = 0 # for now
479 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
480 intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
481 m.submodules.intpick1 = intpick1
483 # INT/FP Issue Unit
484 regdecode = RegDecode(self.n_regs)
485 m.submodules.regdecode = regdecode
486 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
487 m.submodules.issueunit = issueunit
490 # write-after-write hazards. NOTE: there is one extra for branches,
491 # so the shadow width is increased by 1
495 # record previous instruction to cast shadow on current instruction
498 # Branch Speculation recorder. tracks the success/fail state as
499 # each instruction is issued, so that when the branch occurs the
500 # allow/cancel can be issued as appropriate.
501 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
503 #---------
504 # ok start wiring things together...
505 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
507 #---------
509 #---------
510 # Issue Unit is where it starts. set up some in/outs for this module
511 #---------
512 comb += [ regdecode.dest_i.eq(self.int_dest_i),
513 regdecode.src1_i.eq(self.int_src1_i),
514 regdecode.src2_i.eq(self.int_src2_i),
515 regdecode.enable_i.eq(self.reg_enable_i),
516 self.issue_o.eq(issueunit.issue_o)
517 ]
519 # take these to outside (issue needs them)
520 comb += cua.oper_i.eq(self.alu_oper_i)
521 comb += cua.imm_i.eq(self.alu_imm_i)
522 comb += cub.oper_i.eq(self.br_oper_i)
523 comb += cub.imm_i.eq(self.br_imm_i)
524 comb += cul.oper_i.eq(self.ls_oper_i)
525 comb += cul.imm_i.eq(self.ls_imm_i)
527 # TODO: issueunit.f (FP)
529 # and int function issue / busy arrays, and dest/src1/src2
530 comb += intfus.dest_i.eq(regdecode.dest_o)
531 comb += intfus.src1_i.eq(regdecode.src1_o)
532 comb += intfus.src2_i.eq(regdecode.src2_o)
534 fn_issue_o = issueunit.fn_issue_o
536 comb += intfus.fn_issue_i.eq(fn_issue_o)
537 comb += issueunit.busy_i.eq(cu.busy_o)
538 comb += self.busy_o.eq(cu.busy_o.bool())
540 #---------
541 # Memory Function Unit
542 #---------
543 reset_b = Signal(cul.n_units, reset_less=True)
544 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
546 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
548 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
550 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
551 # in a transitive fashion). This cycle activates based on LDSTCompUnit
552 # issue_i. multi-issue gets a bit more complex but not a lot.
553 prior_ldsts = Signal(cul.n_units, reset_less=True)
554 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
555 with m.If(self.ls_oper_i[3]): # LD bit of operand
556 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
557 with m.If(self.ls_oper_i[2]): # ST bit of operand
558 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
560 # TODO: adr_rel_o needs to go into L1 Cache. for now,
561 # just immediately activate go_adr
564 # connect up address data
568 # connect loadable / storable to go_ld/go_st.
569 # XXX should only be done when the memory ld/st has actually happened!
570 go_st_i = Signal(cul.n_units, reset_less=True)
571 go_ld_i = Signal(cul.n_units, reset_less=True)
574 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
575 cul.sto_rel_o & cul.st_o)
576 comb += memfus.go_ld_i.eq(go_ld_i)
577 comb += memfus.go_st_i.eq(go_st_i)
578 #comb += cul.go_wr_i.eq(go_ld_i)
579 comb += cul.go_st_i.eq(go_st_i)
581 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
582 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
583 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
585 #---------
586 # merge shadow matrices outputs
587 #---------
589 # these are explained in ShadowMatrix docstring, and are to be
590 # connected to the FUReg and FUFU Matrices, to get them to reset
591 anydie = Signal(n_intfus, reset_less=True)
593 shreset = Signal(n_intfus, reset_less=True)
596 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
598 #---------
599 # connect fu-fu matrix
600 #---------
602 # Group Picker... done manually for now.
603 go_rd_o = intpick1.go_rd_o
604 go_wr_o = intpick1.go_wr_o
605 go_rd_i = intfus.go_rd_i
606 go_wr_i = intfus.go_wr_i
607 go_die_i = intfus.go_die_i
608 # NOTE: connect to the shadowed versions so that they can "die" (reset)
609 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
610 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
611 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
613 # Connect Picker
614 #---------
615 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
616 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
618 int_wr_o = intfus.writable_o
620 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
622 #---------
624 #---------
629 #---------
630 # NOTE; this setup is for the instruction order preservation...
632 # connect shadows / go_dies to Computation Units
634 comb += cu.go_die_i[0:n_intfus].eq(anydie)
636 # ok connect first n_int_fu shadows to busy lines, to create an
637 # instruction-order linked-list-like arrangement, using a bit-matrix
638 # (instead of e.g. a ring buffer).
640 # when written, the shadow can be cancelled (and was good)
641 for i in range(n_intfus):
644 # *previous* instruction shadows *current* instruction, and, obviously,
645 # if the previous is completed (!busy) don't cast the shadow!
646 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
647 for i in range(n_intfus):
650 #---------
651 # ... and this is for branch speculation. it uses the extra bit
653 # only needs to set shadow_i, s_fail_i and s_good_i
655 # issue captures shadow_i (if enabled)
658 bactive = Signal(reset_less=True)
659 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
661 # instruction being issued (fn_issue_o) has a shadow cast by the branch
662 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
664 for i in range(n_intfus):
665 with m.If(fn_issue_o & (Const(1<<i))):
668 # finally, we need an indicator to the test infrastructure as to
669 # whether the branch succeeded or failed, plus, link up to the
670 # "recorder" of whether the instruction was under shadow or not
672 with m.If(br1.issue_i):
673 sync += bspec.active_i.eq(1)
674 with m.If(self.branch_succ_i):
675 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
676 with m.If(self.branch_fail_i):
677 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
679 # branch is active (TODO: a better signal: this is over-using the
680 # go_write signal - actually the branch should not be "writing")
681 with m.If(br1.go_wr_i):
682 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
683 sync += bspec.active_i.eq(0)
684 comb += bspec.br_i.eq(1)
685 # branch occurs if data == 1, failed if data == 0
686 comb += bspec.br_ok_i.eq(br1.data_o == 1)
687 for i in range(n_intfus):
688 # *expected* direction of the branch matched against *actual*
690 # ... or it didn't
693 #---------
694 # Connect Register File(s)
695 #---------
696 comb += int_dest.wen.eq(intfus.dest_rsel_o)
697 comb += int_src1.ren.eq(intfus.src1_rsel_o)
698 comb += int_src2.ren.eq(intfus.src2_rsel_o)
700 # connect ALUs to regfule
701 comb += int_dest.data_i.eq(cu.data_o)
702 comb += cu.src1_i.eq(int_src1.data_o)
703 comb += cu.src2_i.eq(int_src2.data_o)
705 # connect ALU Computation Units
706 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
707 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
708 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
710 return m
712 def __iter__(self):
713 yield from self.intregs
714 yield from self.fpregs
715 yield self.int_dest_i
716 yield self.int_src1_i
717 yield self.int_src2_i
718 yield self.issue_o
719 yield self.branch_succ_i
720 yield self.branch_fail_i
721 yield self.branch_direction_o
723 def ports(self):
724 return list(self)
727 class IssueToScoreboard(Elaboratable):
729 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
730 self.qlen = qlen
731 self.n_in = n_in
732 self.n_out = n_out
733 self.rwid = rwid
734 self.opw = opwid
735 self.n_regs = n_regs
737 mqbits = unsigned(int(log(qlen) / log(2))+2)
740 self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
742 self.busy_o = Signal(reset_less=True) # at least one CU is busy
743 self.qlen_o = Signal(mqbits, reset_less=True)
745 def elaborate(self, platform):
746 m = Module()
747 comb = m.d.comb
748 sync = m.d.sync
750 iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
751 sc = Scoreboard(self.rwid, self.n_regs)
752 m.submodules.iq = iq
753 m.submodules.sc = sc
755 # get at the regfile for testing
756 self.intregs = sc.intregs
758 # and the "busy" signal and instruction queue length
759 comb += self.busy_o.eq(sc.busy_o)
760 comb += self.qlen_o.eq(iq.qlen_o)
762 # link up instruction queue
765 for i in range(self.n_in):
766 comb += eq(iq.data_i[i], self.data_i[i])
768 # take instruction and process it. note that it's possible to
769 # "inspect" the queue contents *without* actually removing the
770 # items. items are only removed when the
772 # in "waiting" state
773 wait_issue_br = Signal()
774 wait_issue_alu = Signal()
775 wait_issue_ls = Signal()
777 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
778 # set instruction pop length to 1 if the unit accepted
779 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
780 with m.If(iq.qlen_o != 0):
781 comb += iq.n_sub_i.eq(1)
782 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
783 with m.If(iq.qlen_o != 0):
784 comb += iq.n_sub_i.eq(1)
785 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
786 with m.If(iq.qlen_o != 0):
787 comb += iq.n_sub_i.eq(1)
789 # see if some instruction(s) are here. note that this is
790 # "inspecting" the in-place queue. note also that on the
791 # cycle following "waiting" for fn_issue_o to be set, the
792 # "resetting" done above (insn_i=0) could be re-ASSERTed.
793 with m.If(iq.qlen_o != 0):
794 # get the operands and operation
795 imm = iq.data_o[0].imm_i
796 dest = iq.data_o[0].dest_i
797 src1 = iq.data_o[0].src1_i
798 src2 = iq.data_o[0].src2_i
799 op = iq.data_o[0].oper_i
800 opi = iq.data_o[0].opim_i # immediate set
802 # set the src/dest regs
803 comb += sc.int_dest_i.eq(dest)
804 comb += sc.int_src1_i.eq(src1)
805 comb += sc.int_src2_i.eq(src2)
806 comb += sc.reg_enable_i.eq(1) # enable the regfile
808 # choose a Function-Unit-Group
809 with m.If((op & (0x3<<2)) != 0): # branch
810 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
811 comb += sc.br_imm_i.eq(imm)
812 comb += sc.brissue.insn_i.eq(1)
813 comb += wait_issue_br.eq(1)
814 with m.Elif((op & (0x3<<4)) != 0): # ld/st
815 # see compldst.py
817 # bit 1: immed
818 # bit 4: LD
819 # bit 5: ST
820 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
821 comb += sc.ls_imm_i.eq(imm)
822 comb += sc.lsissue.insn_i.eq(1)
823 comb += wait_issue_ls.eq(1)
824 with m.Else(): # alu
825 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
826 comb += sc.alu_imm_i.eq(imm)
827 comb += sc.aluissue.insn_i.eq(1)
828 comb += wait_issue_alu.eq(1)
830 # XXX TODO
831 # these indicate that the instruction is to be made
833 # (either) branch success or branch fail
834 #yield sc.branch_fail_i.eq(branch_fail)
835 #yield sc.branch_succ_i.eq(branch_success)
837 return m
839 def __iter__(self):
841 for o in self.data_i:
842 yield from list(o)
845 def ports(self):
846 return list(self)
850 ISUB = 1
851 IMUL = 2
852 ISHF = 3
853 IBGT = 4
854 IBLT = 5
855 IBEQ = 6
856 IBNE = 7
859 class RegSim:
860 def __init__(self, rwidth, nregs):
861 self.rwidth = rwidth
862 self.regs = [0] * nregs
864 def op(self, op, op_imm, imm, src1, src2, dest):
865 maxbits = (1 << self.rwidth) - 1
866 src1 = self.regs[src1] & maxbits
867 if op_imm:
868 src2 = imm
869 else:
870 src2 = self.regs[src2] & maxbits
872 val = src1 + src2
873 elif op == ISUB:
874 val = src1 - src2
875 elif op == IMUL:
876 val = src1 * src2
877 elif op == ISHF:
878 val = src1 >> (src2 & maxbits)
879 elif op == IBGT:
880 val = int(src1 > src2)
881 elif op == IBLT:
882 val = int(src1 < src2)
883 elif op == IBEQ:
884 val = int(src1 == src2)
885 elif op == IBNE:
886 val = int(src1 != src2)
887 else:
888 return 0 # LD/ST TODO
889 val &= maxbits
890 self.setval(dest, val)
891 return val
893 def setval(self, dest, val):
894 print ("sim setval", dest, hex(val))
895 self.regs[dest] = val
897 def dump(self, dut):
898 for i, val in enumerate(self.regs):
899 reg = yield dut.intregs.regs[i].reg
900 okstr = "OK" if reg == val else "!ok"
901 print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
903 def check(self, dut):
904 for i, val in enumerate(self.regs):
905 reg = yield dut.intregs.regs[i].reg
906 if reg != val:
907 print("reg %d expected %x received %x\n" % (i, val, reg))
908 yield from self.dump(dut)
909 assert False
911 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
912 branch_success, branch_fail):
913 instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
914 'src1_i': src1, 'src2_i': src2}]
916 sendlen = 1
917 for idx in range(sendlen):
918 yield from eq(dut.data_i[idx], instrs[idx])
919 di = yield dut.data_i[idx]
920 print ("senddata %d %x" % (idx, di))
922 yield
925 yield
931 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
932 yield from disable_issue(dut)
933 yield dut.int_dest_i.eq(dest)
934 yield dut.int_src1_i.eq(src1)
935 yield dut.int_src2_i.eq(src2)
936 if (op & (0x3<<2)) != 0: # branch
937 yield dut.brissue.insn_i.eq(1)
938 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
939 yield dut.br_imm_i.eq(imm)
940 dut_issue = dut.brissue
941 else:
942 yield dut.aluissue.insn_i.eq(1)
943 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
944 yield dut.alu_imm_i.eq(imm)
945 dut_issue = dut.aluissue
946 yield dut.reg_enable_i.eq(1)
948 # these indicate that the instruction is to be made shadow-dependent on
949 # (either) branch success or branch fail
950 yield dut.branch_fail_i.eq(branch_fail)
951 yield dut.branch_succ_i.eq(branch_success)
953 yield
954 yield from wait_for_issue(dut, dut_issue)
957 def print_reg(dut, rnums):
958 rs = []
959 for rnum in rnums:
960 reg = yield dut.intregs.regs[rnum].reg
961 rs.append("%x" % reg)
962 rnums = map(str, rnums)
963 print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
966 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
967 insts = []
968 for i in range(n_ops):
969 src1 = randint(1, dut.n_regs-1)
970 src2 = randint(1, dut.n_regs-1)
971 imm = randint(1, (1<<dut.rwid)-1)
972 dest = randint(1, dut.n_regs-1)
973 op = randint(0, max_opnums)
974 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
977 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
978 else:
979 insts.append((src1, src2, dest, op, opi, imm))
980 return insts
983 def wait_for_busy_clear(dut):
984 while True:
985 busy_o = yield dut.busy_o
986 if not busy_o:
987 break
988 print ("busy",)
989 yield
991 def disable_issue(dut):
992 yield dut.aluissue.insn_i.eq(0)
993 yield dut.brissue.insn_i.eq(0)
994 yield dut.lsissue.insn_i.eq(0)
997 def wait_for_issue(dut, dut_issue):
998 while True:
999 issue_o = yield dut_issue.fn_issue_o
1000 if issue_o:
1001 yield from disable_issue(dut)
1002 yield dut.reg_enable_i.eq(0)
1003 break
1004 print ("busy",)
1005 #yield from print_reg(dut, [1,2,3])
1006 yield
1007 #yield from print_reg(dut, [1,2,3])
1009 def scoreboard_branch_sim(dut, alusim):
1011 iseed = 3
1013 for i in range(1):
1015 print ("rseed", iseed)
1016 seed(iseed)
1017 iseed += 1
1019 yield dut.branch_direction_o.eq(0)
1021 # set random values in the registers
1022 for i in range(1, dut.n_regs):
1023 val = 31+i*3
1024 val = randint(0, (1<<alusim.rwidth)-1)
1025 yield dut.intregs.regs[i].reg.eq(val)
1026 alusim.setval(i, val)
1028 if False:
1029 # create some instructions: branches create a tree
1030 insts = create_random_ops(dut, 1, True, 1)
1031 #insts.append((6, 6, 1, 2, (0, 0)))
1032 #insts.append((4, 3, 3, 0, (0, 0)))
1034 src1 = randint(1, dut.n_regs-1)
1035 src2 = randint(1, dut.n_regs-1)
1036 #op = randint(4, 7)
1037 op = 4 # only BGT at the moment
1039 branch_ok = create_random_ops(dut, 1, True, 1)
1040 branch_fail = create_random_ops(dut, 1, True, 1)
1042 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1044 if True:
1045 insts = []
1046 insts.append( (3, 5, 2, 0, (0, 0)) )
1047 branch_ok = []
1048 branch_fail = []
1049 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1050 branch_ok.append( None )
1051 branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1052 #branch_fail.append( None )
1053 insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1055 siminsts = deepcopy(insts)
1057 # issue instruction(s)
1058 i = -1
1059 instrs = insts
1060 branch_direction = 0
1061 while instrs:
1062 yield
1063 yield
1064 i += 1
1065 branch_direction = yield dut.branch_direction_o # way branch went
1067 if branch_direction == 1 and shadow_on:
1069 continue # branch was "success" and this is a "failed"... skip
1070 if branch_direction == 2 and shadow_off:
1072 continue # branch was "fail" and this is a "success"... skip
1073 if branch_direction != 0:
1076 is_branch = op >= 4
1077 if is_branch:
1078 branch_ok, branch_fail = dest
1079 dest = src2
1080 # ok zip up the branch success / fail instructions and
1081 # drop them into the queue, one marked "to have branch success"
1082 # the other to be marked shadow branch "fail".
1083 # one out of each of these will be cancelled
1084 for ok, fl in zip(branch_ok, branch_fail):
1085 if ok:
1086 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1087 if fl:
1088 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1089 print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1091 yield from int_instr(dut, op, src1, src2, dest,
1094 # wait for all instructions to stop before checking
1095 yield
1096 yield from wait_for_busy_clear(dut)
1098 i = -1
1099 while siminsts:
1100 instr = siminsts.pop(0)
1101 if instr is None:
1102 continue
1104 i += 1
1105 is_branch = op >= 4
1106 if is_branch:
1107 branch_ok, branch_fail = dest
1108 dest = src2
1109 print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1111 branch_res = alusim.op(op, src1, src2, dest)
1112 if is_branch:
1113 if branch_res:
1114 siminsts += branch_ok
1115 else:
1116 siminsts += branch_fail
1118 # check status
1119 yield from alusim.check(dut)
1120 yield from alusim.dump(dut)
1123 def scoreboard_sim(dut, alusim):
1125 seed(0)
1127 for i in range(1):
1129 # set random values in the registers
1130 for i in range(1, dut.n_regs):
1131 #val = randint(0, (1<<alusim.rwidth)-1)
1132 #val = 31+i*3
1133 val = i
1134 yield dut.intregs.regs[i].reg.eq(val)
1135 alusim.setval(i, val)
1137 # create some instructions (some random, some regression tests)
1138 instrs = []
1139 if False:
1140 instrs = create_random_ops(dut, 15, True, 4)
1142 if True: # LD/ST test (with immediate)
1143 instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1144 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1146 if True:
1147 instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1149 if True:
1150 instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1151 instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1152 instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1154 if True:
1155 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1156 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1157 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1158 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1159 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1161 if False:
1162 instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1163 instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1164 instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1166 if False:
1167 instrs.append((5, 6, 2, 1))
1168 instrs.append((2, 2, 4, 0))
1169 #instrs.append((2, 2, 3, 1))
1171 if False:
1172 instrs.append((2, 1, 2, 3))
1174 if False:
1175 instrs.append((2, 6, 2, 1))
1176 instrs.append((2, 1, 2, 0))
1178 if False:
1179 instrs.append((1, 2, 7, 2))
1180 instrs.append((7, 1, 5, 0))
1181 instrs.append((4, 4, 1, 1))
1183 if False:
1184 instrs.append((5, 6, 2, 2))
1185 instrs.append((1, 1, 4, 1))
1186 instrs.append((6, 5, 3, 0))
1188 if False:
1189 # Write-after-Write Hazard
1190 instrs.append( (3, 6, 7, 2) )
1191 instrs.append( (4, 4, 7, 1) )
1193 if False:
1195 instrs.append((1, 1, 1, 1))
1196 instrs.append((1, 5, 3, 0))
1198 if False:
1200 instrs.append((5, 6, 1, 2))
1201 instrs.append((1, 1, 1, 1))
1203 if False:
1205 instrs.append((5, 6, 1, 2))
1206 instrs.append((1, 1, 1, 1))
1207 instrs.append((1, 5, 3, 0))
1209 if False:
1210 # very weird failure
1211 instrs.append( (5, 2, 5, 2) )
1212 instrs.append( (2, 6, 3, 0) )
1213 instrs.append( (4, 2, 2, 1) )
1215 if False:
1216 v1 = 4
1217 yield dut.intregs.regs[5].reg.eq(v1)
1218 alusim.setval(5, v1)
1219 yield dut.intregs.regs[3].reg.eq(5)
1220 alusim.setval(3, 5)
1221 instrs.append((5, 3, 3, 4, (0, 0)))
1222 instrs.append((4, 2, 1, 2, (0, 1)))
1224 if False:
1225 v1 = 6
1226 yield dut.intregs.regs[5].reg.eq(v1)
1227 alusim.setval(5, v1)
1228 yield dut.intregs.regs[3].reg.eq(5)
1229 alusim.setval(3, 5)
1230 instrs.append((5, 3, 3, 4, (0, 0)))
1231 instrs.append((4, 2, 1, 2, (1, 0)))
1233 if False:
1234 instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1235 instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1236 instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1237 instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1238 instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1239 instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1240 instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1241 instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1242 instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1244 # issue instruction(s), wait for issue to be free before proceeding
1245 for i, instr in enumerate(instrs):
1246 src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1248 print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1249 (i, src1, src2, dest, op, opi, imm))
1250 alusim.op(op, opi, imm, src1, src2, dest)
1251 yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1252 br_ok, br_fail)
1254 # wait for all instructions to stop before checking
1255 while True:
1256 iqlen = yield dut.qlen_o
1257 if iqlen == 0:
1258 break
1259 yield
1260 yield
1261 yield
1262 yield
1263 yield
1264 yield from wait_for_busy_clear(dut)
1266 # check status
1267 yield from alusim.check(dut)
1268 yield from alusim.dump(dut)
1271 def test_scoreboard():
1272 dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1273 alusim = RegSim(16, 8)
1274 memsim = MemSim(16, 8)
1275 vl = rtlil.convert(dut, ports=dut.ports())
1276 with open("test_scoreboard6600.il", "w") as f:
1277 f.write(vl)
1279 run_simulation(dut, scoreboard_sim(dut, alusim),
1280 vcd_name='test_scoreboard6600.vcd')
1282 #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1283 # vcd_name='test_scoreboard6600.vcd')
1286 if __name__ == '__main__':
1287 test_scoreboard()