Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, ortreereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst_multi import LDSTCompUnit
20 from soc.experiment.compldst_multi import CompLDSTOpSubset
21 from soc.experiment.l0_cache import TstL0CacheBuffer
22
23 # for testing purposes
24 from soc.config.test.test_loadstore import TestMemPspec
25 from soc.experiment.alu_hier import ALUFunctionUnit, BranchALU
26 from soc.fu.alu.alu_input_record import CompALUOpSubset
27
28 from openpower.decoder.power_enums import MicrOp, Function
29 from openpower.decoder.power_decoder import (create_pdecode)
30 from openpower.decoder.power_decoder2 import (PowerDecode2)
31 from openpower.decoder.power_decoder2 import Decode2ToExecute1Type
32
33 from openpower.simulator.program import Program
34
35
36 from nmutil.latch import SRLatch
37 from nmutil.nmoperator import eq
38
39 from random import randint, seed
40 from copy import deepcopy
41 from math import log
42
43 from soc.experiment.sim import RegSim, MemSim
44 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
45
46
47 class CompUnitsBase(Elaboratable):
48 """ Computation Unit Base class.
49
50 Amazingly, this class works recursively. It's supposed to just
51 look after some ALUs (that can handle the same operations),
52 grouping them together, however it turns out that the same code
53 can also group *groups* of Computation Units together as well.
54
55 Basically it was intended just to concatenate the ALU's issue,
56 go_rd etc. signals together, which start out as bits and become
57 sequences. Turns out that the same trick works just as well
58 on Computation Units!
59
60 So this class may be used recursively to present a top-level
61 sequential concatenation of all the signals in and out of
62 ALUs, whilst at the same time making it convenient to group
63 ALUs together.
64
65 At the lower level, the intent is that groups of (identical)
66 ALUs may be passed the same operation. Even beyond that,
67 the intent is that that group of (identical) ALUs actually
68 share the *same pipeline* and as such become a "Concurrent
69 Computation Unit" as defined by Mitch Alsup (see section
70 11.4.9.3)
71 """
72
73 def __init__(self, rwid, units, ldstmode=False):
74 """ Inputs:
75
76 * :rwid: bit width of register file(s) - both FP and INT
77 * :units: sequence of ALUs (or CompUnitsBase derivatives)
78 """
79 self.units = units
80 self.ldstmode = ldstmode
81 self.rwid = rwid
82 self.rwid = rwid
83 if units and isinstance(units[0], CompUnitsBase):
84 self.n_units = 0
85 for u in self.units:
86 self.n_units += u.n_units
87 else:
88 self.n_units = len(units)
89
90 n_units = self.n_units
91
92 # inputs
93 self.issue_i = Signal(n_units, reset_less=True)
94 self.rd0 = go_record(n_units, "rd0")
95 self.rd1 = go_record(n_units, "rd1")
96 self.go_rd_i = [self.rd0.go_i, self.rd1.go_i] # XXX HACK!
97 self.wr0 = go_record(n_units, "wr0")
98 self.go_wr_i = [self.wr0.go_i]
99 self.shadown_i = Signal(n_units, reset_less=True)
100 self.go_die_i = Signal(n_units, reset_less=True)
101 if ldstmode:
102 self.go_ad_i = Signal(n_units, reset_less=True)
103 self.go_st_i = Signal(n_units, reset_less=True)
104
105 # outputs
106 self.busy_o = Signal(n_units, reset_less=True)
107 self.rd_rel_o = [self.rd0.rel_o, self.rd1.rel_o] # HACK!
108 self.req_rel_o = self.wr0.rel_o
109 self.done_o = Signal(n_units, reset_less=True)
110 if ldstmode:
111 self.ld_o = Signal(n_units, reset_less=True) # op is LD
112 self.st_o = Signal(n_units, reset_less=True) # op is ST
113 self.adr_rel_o = Signal(n_units, reset_less=True)
114 self.sto_rel_o = Signal(n_units, reset_less=True)
115 self.load_mem_o = Signal(n_units, reset_less=True)
116 self.stwd_mem_o = Signal(n_units, reset_less=True)
117 self.addr_o = Signal(rwid, reset_less=True)
118
119 # in/out register data (note: not register#, actual data)
120 self.o_data = Signal(rwid, reset_less=True)
121 self.src1_i = Signal(rwid, reset_less=True)
122 self.src2_i = Signal(rwid, reset_less=True)
123 # input operand
124
125 def elaborate(self, platform):
126 m = Module()
127 comb = m.d.comb
128
129 for i, alu in enumerate(self.units):
130 setattr(m.submodules, "comp%d" % i, alu)
131
132 go_rd_l0 = []
133 go_rd_l1 = []
134 go_wr_l = []
135 issue_l = []
136 busy_l = []
137 req_rel_l = []
138 done_l = []
139 rd_rel0_l = []
140 rd_rel1_l = []
141 shadow_l = []
142 godie_l = []
143 for alu in self.units:
144 req_rel_l.append(alu.req_rel_o)
145 done_l.append(alu.done_o)
146 shadow_l.append(alu.shadown_i)
147 godie_l.append(alu.go_die_i)
148 print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
149 rd_rel0_l.append(alu.rd_rel_o[0])
150 rd_rel1_l.append(alu.rd_rel_o[1])
151 go_wr_l.append(alu.go_wr_i)
152 go_rd_l0.append(alu.go_rd_i[0])
153 go_rd_l1.append(alu.go_rd_i[1])
154 issue_l.append(alu.issue_i)
155 busy_l.append(alu.busy_o)
156 comb += self.rd0.rel_o.eq(Cat(*rd_rel0_l))
157 comb += self.rd1.rel_o.eq(Cat(*rd_rel1_l))
158 comb += self.req_rel_o.eq(Cat(*req_rel_l))
159 comb += self.done_o.eq(Cat(*done_l))
160 comb += self.busy_o.eq(Cat(*busy_l))
161 comb += Cat(*godie_l).eq(self.go_die_i)
162 comb += Cat(*shadow_l).eq(self.shadown_i)
163 comb += Cat(*go_wr_l).eq(self.wr0.go_i) # XXX TODO
164 comb += Cat(*go_rd_l0).eq(self.rd0.go_i)
165 comb += Cat(*go_rd_l1).eq(self.rd1.go_i)
166 comb += Cat(*issue_l).eq(self.issue_i)
167
168 # connect data register input/output
169
170 # merge (OR) all integer FU / ALU outputs to a single value
171 # XXX NOTE: this only works because there is a single "port"
172 # protected by a single go_wr. multi-issue requires a bus
173 # to be inserted here.
174 if self.units:
175 o_data = ortreereduce(self.units, "o_data")
176 comb += self.o_data.eq(o_data)
177 if self.ldstmode:
178 addr_o = ortreereduce(self.units, "addr_o")
179 comb += self.addr_o.eq(addr_o)
180
181 for i, alu in enumerate(self.units):
182 comb += alu.src1_i.eq(self.src1_i)
183 comb += alu.src2_i.eq(self.src2_i)
184 # temporary: set read mask to 0b111111111
185 if hasattr(alu, "rdmaskn"):
186 with m.If(alu.busy_o):
187 comb += alu.rdmaskn.eq(-1)
188
189 if not self.ldstmode:
190 return m
191
192 ldmem_l = []
193 stmem_l = []
194 go_ad_l = []
195 go_st_l = []
196 ld_l = []
197 st_l = []
198 adr_rel_l = []
199 sto_rel_l = []
200 for alu in self.units:
201 ld_l.append(alu.ld_o)
202 st_l.append(alu.st_o)
203 adr_rel_l.append(alu.adr_rel_o)
204 sto_rel_l.append(alu.sto_rel_o)
205 ldmem_l.append(alu.load_mem_o)
206 stmem_l.append(alu.stwd_mem_o)
207 go_ad_l.append(alu.go_ad_i)
208 go_st_l.append(alu.go_st_i)
209 comb += self.ld_o.eq(Cat(*ld_l))
210 comb += self.st_o.eq(Cat(*st_l))
211 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
212 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
213 comb += self.load_mem_o.eq(Cat(*ldmem_l))
214 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
215 comb += Cat(*go_ad_l).eq(self.go_ad_i)
216 comb += Cat(*go_st_l).eq(self.go_st_i)
217
218 return m
219
220
221 class CompUnitLDSTs(CompUnitsBase):
222
223 def __init__(self, rwid, opwid, n_ldsts, l0):
224 """ Inputs:
225
226 * :rwid: bit width of register file(s) - both FP and INT
227 * :opwid: operand bit width
228 """
229 self.opwid = opwid
230
231 # inputs
232 self.op = CompLDSTOpSubset("cul_i")
233
234 # LD/ST Units
235 units = []
236 for i in range(n_ldsts):
237 pi = l0.l0.dports[i]
238 units.append(LDSTCompUnit(pi, rwid, awid=48))
239
240 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
241
242 def elaborate(self, platform):
243 m = CompUnitsBase.elaborate(self, platform)
244 comb = m.d.comb
245
246 # hand the same operation to all units
247 for ldst in self.units:
248 comb += ldst.oper_i.eq(self.op)
249
250 return m
251
252
253 class CompUnitALUs(CompUnitsBase):
254
255 def __init__(self, rwid, opwid, n_alus):
256 """ Inputs:
257
258 * :rwid: bit width of register file(s) - both FP and INT
259 * :opwid: operand bit width
260 """
261 self.opwid = opwid
262
263 # inputs
264 self.op = CompALUOpSubset("cua_i")
265
266 # Int ALUs
267 alus = []
268
269 units = []
270 for i in range(n_alus):
271 fu = ALUFunctionUnit(i)
272 units.append(fu)
273 alus.append(fu.alu)
274
275 CompUnitsBase.__init__(self, rwid, units)
276
277 def elaborate(self, platform):
278 m = CompUnitsBase.elaborate(self, platform)
279 comb = m.d.comb
280
281 # hand the subset of operation to ALUs
282 for alu in self.units:
283 comb += alu.oper_i.eq(self.op)
284
285 return m
286
287
288 class CompUnitBR(CompUnitsBase):
289
290 def __init__(self, rwid, opwid):
291 """ Inputs:
292
293 * :rwid: bit width of register file(s) - both FP and INT
294 * :opwid: operand bit width
295
296 Note: bgt unit is returned so that a shadow unit can be created
297 for it
298 """
299 self.opwid = opwid
300
301 # inputs
302 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
303 self.oper_i = Signal(opwid, reset_less=True)
304 self.imm_i = Signal(rwid, reset_less=True)
305
306 # Branch ALU and CU
307 self.bgt = BranchALU(rwid)
308 aluopwid = 3 # extra bit for immediate mode
309 self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
310 CompUnitsBase.__init__(self, rwid, [self.br1])
311
312 def elaborate(self, platform):
313 m = CompUnitsBase.elaborate(self, platform)
314 comb = m.d.comb
315
316 # hand the same operation to all units
317 for alu in self.units:
318 # comb += alu.oper_i.eq(self.op) # TODO
319 comb += alu.oper_i.eq(self.oper_i)
320 #comb += alu.imm_i.eq(self.imm_i)
321
322 return m
323
324
325 class FunctionUnits(Elaboratable):
326
327 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
328 self.n_src, self.n_dst = n_src, n_dst
329 self.n_reg = n_reg
330 self.n_int_alus = nf = n_int_alus
331
332 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
333 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
334
335 self.readable_o = Signal(n_int_alus, reset_less=True)
336 self.writable_o = Signal(n_int_alus, reset_less=True)
337
338 # arrays
339 src = []
340 rsel = []
341 rd = []
342 for i in range(n_src):
343 j = i + 1 # name numbering to match src1/src2
344 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
345 rsel.append(Signal(n_reg, name="src%d_rsel_o" %
346 j, reset_less=True))
347 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
348 dst = []
349 dsel = []
350 wr = []
351 for i in range(n_dst):
352 j = i + 1 # name numbering to match src1/src2
353 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
354 dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
355 j, reset_less=True))
356 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
357 wpnd = []
358 pend = []
359 for i in range(nf):
360 j = i + 1 # name numbering to match src1/src2
361 pend.append(Signal(nf, name="rd_src%d_pend_o" %
362 j, reset_less=True))
363 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
364 j, reset_less=True))
365
366 self.dest_i = dst # Dest in (top)
367 self.src_i = src # oper in (top)
368
369 # for Register File Select Lines (horizontal), per-reg
370 self.dst_rsel_o = dsel # dest reg (bot)
371 self.src_rsel_o = rsel # src reg (bot)
372
373 self.go_rd_i = rd
374 self.go_wr_i = wr
375
376 self.go_die_i = Signal(n_int_alus, reset_less=True)
377 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
378
379 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
380
381 def elaborate(self, platform):
382 m = Module()
383 comb = m.d.comb
384 sync = m.d.sync
385
386 n_intfus = self.n_int_alus
387
388 # Integer FU-FU Dep Matrix
389 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
390 m.submodules.intfudeps = intfudeps
391 # Integer FU-Reg Dep Matrix
392 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
393 m.submodules.intregdeps = intregdeps
394
395 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
396 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
397
398 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
399 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
400
401 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
402 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
403 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
404
405 comb += intfudeps.issue_i.eq(self.fn_issue_i)
406 comb += intfudeps.go_die_i.eq(self.go_die_i)
407 comb += self.readable_o.eq(intfudeps.readable_o)
408 comb += self.writable_o.eq(intfudeps.writable_o)
409
410 # Connect function issue / arrays, and dest/src1/src2
411 for i in range(self.n_src):
412 print(i, self.go_rd_i, intfudeps.go_rd_i)
413 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
414 comb += intregdeps.src_i[i].eq(self.src_i[i])
415 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
416 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
417 for i in range(self.n_dst):
418 print(i, self.go_wr_i, intfudeps.go_wr_i)
419 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
420 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
421 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
422 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
423 comb += intregdeps.go_die_i.eq(self.go_die_i)
424 comb += intregdeps.issue_i.eq(self.fn_issue_i)
425
426 return m
427
428
429 class Scoreboard(Elaboratable):
430 def __init__(self, rwid, n_regs):
431 """ Inputs:
432
433 * :rwid: bit width of register file(s) - both FP and INT
434 * :n_regs: depth of register file(s) - number of FP and INT regs
435 """
436 self.rwid = rwid
437 self.n_regs = n_regs
438
439 # Register Files
440 self.intregs = RegFileArray(rwid, n_regs)
441 self.fpregs = RegFileArray(rwid, n_regs)
442
443 # Memory (test for now)
444 pspec = TestMemPspec(ldst_ifacetype='testpi',
445 addr_wid=48,
446 mask_wid=8,
447 reg_wid=64)
448 dut = TstL0CacheBuffer(pspec)
449 self.l0 = TstL0CacheBuffer(pspec)
450
451 # issue q needs to get at these
452 self.aluissue = IssueUnitGroup(2)
453 self.lsissue = IssueUnitGroup(2)
454 self.brissue = IssueUnitGroup(1)
455 # and these
456 self.instr = Decode2ToExecute1Type("sc_instr")
457 self.br_oper_i = Signal(4, reset_less=True)
458 self.br_imm_i = Signal(rwid, reset_less=True)
459 self.ls_oper_i = Signal(4, reset_less=True)
460
461 # inputs
462 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
463 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
464 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
465 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
466
467 # outputs
468 self.issue_o = Signal(reset_less=True) # instruction was accepted
469 self.busy_o = Signal(reset_less=True) # at least one CU is busy
470
471 # for branch speculation experiment. branch_direction = 0 if
472 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
473 # branch_succ and branch_fail are requests to have the current
474 # instruction be dependent on the branch unit "shadow" capability.
475 self.branch_succ_i = Signal(reset_less=True)
476 self.branch_fail_i = Signal(reset_less=True)
477 self.branch_direction_o = Signal(2, reset_less=True)
478
479 def elaborate(self, platform):
480 m = Module()
481 comb = m.d.comb
482 sync = m.d.sync
483
484 m.submodules.intregs = self.intregs
485 m.submodules.fpregs = self.fpregs
486 m.submodules.l0 = l0 = self.l0
487
488 # register ports
489 int_dest = self.intregs.write_port("dest")
490 int_src1 = self.intregs.read_port("src1")
491 int_src2 = self.intregs.read_port("src2")
492
493 fp_dest = self.fpregs.write_port("dest")
494 fp_src1 = self.fpregs.read_port("src1")
495 fp_src2 = self.fpregs.read_port("src2")
496
497 # Int ALUs and BR ALUs
498 n_int_alus = 5
499 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
500 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
501
502 # LDST Comp Units
503 n_ldsts = 2
504 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
505
506 # Comp Units
507 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
508 bgt = cub.bgt # get at the branch computation unit
509 br1 = cub.br1
510
511 # Int FUs
512 fu_n_src = 2
513 fu_n_dst = 1
514 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
515 fu_n_src, fu_n_dst)
516
517 # Memory FUs
518 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
519
520 # Memory Priority Picker 1: one gateway per memory port
521 # picks 1 reader and 1 writer to intreg
522 mempick1 = GroupPicker(n_ldsts, 1, 1)
523 m.submodules.mempick1 = mempick1
524
525 # Count of number of FUs
526 n_intfus = n_int_alus
527 n_fp_fus = 0 # for now
528
529 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
530 # picks 1 reader and 1 writer to intreg
531 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
532 m.submodules.intpick1 = ipick1
533
534 # INT/FP Issue Unit
535 regdecode = RegDecode(self.n_regs)
536 m.submodules.regdecode = regdecode
537 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
538 m.submodules.issueunit = issueunit
539
540 # Shadow Matrix. currently n_intfus shadows, to be used for
541 # write-after-write hazards. NOTE: there is one extra for branches,
542 # so the shadow width is increased by 1
543 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
544 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
545
546 # record previous instruction to cast shadow on current instruction
547 prev_shadow = Signal(n_intfus)
548
549 # Branch Speculation recorder. tracks the success/fail state as
550 # each instruction is issued, so that when the branch occurs the
551 # allow/cancel can be issued as appropriate.
552 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
553
554 # ---------
555 # ok start wiring things together...
556 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
557 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
558 # ---------
559
560 # ---------
561 # Issue Unit is where it starts. set up some in/outs for this module
562 # ---------
563 comb += [regdecode.dest_i.eq(self.int_dest_i),
564 regdecode.src1_i.eq(self.int_src1_i),
565 regdecode.src2_i.eq(self.int_src2_i),
566 regdecode.enable_i.eq(self.reg_enable_i),
567 self.issue_o.eq(issueunit.issue_o)
568 ]
569
570 # take these to outside (issue needs them)
571 comb += cua.op.eq_from_execute1(self.instr.do)
572 comb += cub.oper_i.eq(self.br_oper_i)
573 comb += cub.imm_i.eq(self.br_imm_i)
574 comb += cul.op.eq_from_execute1(self.instr.do)
575
576 # TODO: issueunit.f (FP)
577
578 # and int function issue / busy arrays, and dest/src1/src2
579 comb += intfus.dest_i[0].eq(regdecode.dest_o)
580 comb += intfus.src_i[0].eq(regdecode.src1_o)
581 comb += intfus.src_i[1].eq(regdecode.src2_o)
582
583 fn_issue_o = issueunit.fn_issue_o
584
585 comb += intfus.fn_issue_i.eq(fn_issue_o)
586 comb += issueunit.busy_i.eq(cu.busy_o)
587 comb += self.busy_o.eq(cu.busy_o.bool())
588
589 # ---------
590 # Memory Function Unit
591 # ---------
592 reset_b = Signal(cul.n_units, reset_less=True)
593 # XXX was cul.go_wr_i not done.o
594 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
595 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
596
597 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
598 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
599 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
600
601 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
602 # in a transitive fashion). This cycle activates based on LDSTCompUnit
603 # issue_i. multi-issue gets a bit more complex but not a lot.
604 prior_ldsts = Signal(cul.n_units, reset_less=True)
605 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
606 with m.If(self.ls_oper_i[3]): # LD bit of operand
607 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
608 with m.If(self.ls_oper_i[2]): # ST bit of operand
609 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
610
611 # TODO: adr_rel_o needs to go into L1 Cache. for now,
612 # just immediately activate go_adr
613 sync += cul.go_ad_i.eq(cul.adr_rel_o)
614
615 # connect up address data
616 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
617 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
618
619 # connect loadable / storable to go_ld/go_st.
620 # XXX should only be done when the memory ld/st has actually happened!
621 go_st_i = Signal(cul.n_units, reset_less=True)
622 go_ld_i = Signal(cul.n_units, reset_less=True)
623 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
624 cul.adr_rel_o & cul.ld_o)
625 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
626 cul.sto_rel_o & cul.st_o)
627 comb += memfus.go_ld_i.eq(go_ld_i)
628 comb += memfus.go_st_i.eq(go_st_i)
629 #comb += cul.go_wr_i.eq(go_ld_i)
630 comb += cul.go_st_i.eq(go_st_i)
631
632 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
633 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
634 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
635
636 # ---------
637 # merge shadow matrices outputs
638 # ---------
639
640 # these are explained in ShadowMatrix docstring, and are to be
641 # connected to the FUReg and FUFU Matrices, to get them to reset
642 anydie = Signal(n_intfus, reset_less=True)
643 allshadown = Signal(n_intfus, reset_less=True)
644 shreset = Signal(n_intfus, reset_less=True)
645 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
646 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
647 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
648
649 # ---------
650 # connect fu-fu matrix
651 # ---------
652
653 # Group Picker... done manually for now.
654 go_rd_o = ipick1.go_rd_o
655 delay_pick_l = []
656 go_wr_o = ipick1.go_wr_o
657 go_rd_i = intfus.go_rd_i
658 go_wr_i = intfus.go_wr_i
659 go_die_i = intfus.go_die_i
660 # NOTE: connect to the shadowed versions so that they can "die" (reset)
661 for i in range(fu_n_src):
662 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
663 for i in range(fu_n_dst):
664 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
665 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
666
667 # Connect Picker
668 # ---------
669 int_rd_o = intfus.readable_o
670 rrel_o = cu.rd_rel_o
671 rqrl_o = cu.req_rel_o
672 for i in range(fu_n_src):
673 # connect with a delay so that src data arrives at the right time
674 pick = Signal(n_intfus, name="pick_%d" % i)
675 delay_pick = Signal(n_intfus, name="dp_%d" % i)
676 rp = Signal(n_intfus, name="rp_%d" % i)
677 comb += pick[0:n_intfus].eq(rrel_o[i][0:n_intfus] & ~delay_pick)
678 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(pick[0:n_intfus])
679 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
680 sync += delay_pick.eq(rp)
681 comb += rp.eq(go_rd_o[i])
682 delay_pick_l.append(delay_pick)
683 int_wr_o = intfus.writable_o
684 for i in range(fu_n_dst):
685 # XXX FIXME: rqrl_o[i] here
686 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
687 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
688
689 # ---------
690 # Shadow Matrix
691 # ---------
692
693 comb += shadows.issue_i.eq(fn_issue_o)
694 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
695 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
696 # ---------
697 # NOTE; this setup is for the instruction order preservation...
698
699 # connect shadows / go_dies to Computation Units
700 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
701 comb += cu.go_die_i[0:n_intfus].eq(anydie)
702
703 # ok connect first n_int_fu shadows to busy lines, to create an
704 # instruction-order linked-list-like arrangement, using a bit-matrix
705 # (instead of e.g. a ring buffer).
706
707 # when written, the shadow can be cancelled (and was good)
708 for i in range(n_intfus):
709 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
710 # XXX experiment: use ~cu.busy_o instead. *should* be good
711 # because the comp unit is only free once completed
712 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
713
714 # *previous* instruction shadows *current* instruction, and, obviously,
715 # if the previous is completed (!busy) don't cast the shadow!
716 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
717 for i in range(n_intfus):
718 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
719
720 # ---------
721 # ... and this is for branch speculation. it uses the extra bit
722 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
723 # only needs to set shadow_i, s_fail_i and s_good_i
724
725 # issue captures shadow_i (if enabled)
726 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
727
728 bactive = Signal(reset_less=True)
729 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
730
731 # instruction being issued (fn_issue_o) has a shadow cast by the branch
732 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
733 comb += bshadow.issue_i.eq(fn_issue_o)
734 for i in range(n_intfus):
735 with m.If(fn_issue_o & (Const(1 << i))):
736 comb += bshadow.shadow_i[i][0].eq(1)
737
738 # finally, we need an indicator to the test infrastructure as to
739 # whether the branch succeeded or failed, plus, link up to the
740 # "recorder" of whether the instruction was under shadow or not
741
742 with m.If(br1.issue_i):
743 sync += bspec.active_i.eq(1)
744 with m.If(self.branch_succ_i):
745 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
746 with m.If(self.branch_fail_i):
747 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
748
749 # branch is active (TODO: a better signal: this is over-using the
750 # go_write signal - actually the branch should not be "writing")
751 with m.If(br1.go_wr_i):
752 sync += self.branch_direction_o.eq(br1.o_data+Const(1, 2))
753 sync += bspec.active_i.eq(0)
754 comb += bspec.br_i.eq(1)
755 # branch occurs if data == 1, failed if data == 0
756 comb += bspec.br_ok_i.eq(br1.o_data == 1)
757 for i in range(n_intfus):
758 # *expected* direction of the branch matched against *actual*
759 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
760 # ... or it didn't
761 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
762
763 # ---------
764 # Connect Register File(s)
765 # ---------
766 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
767 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
768 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
769
770 # connect ALUs to regfile
771 comb += int_dest.i_data.eq(cu.o_data)
772 comb += cu.src1_i.eq(int_src1.o_data)
773 comb += cu.src2_i.eq(int_src2.o_data)
774
775 # connect ALU Computation Units
776 for i in range(fu_n_src):
777 comb += cu.go_rd_i[i][0:n_intfus].eq(delay_pick_l[i][0:n_intfus])
778 for i in range(fu_n_dst):
779 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
780 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
781
782 return m
783
784 def __iter__(self):
785 yield from self.intregs
786 yield from self.fpregs
787 yield self.int_dest_i
788 yield self.int_src1_i
789 yield self.int_src2_i
790 yield self.issue_o
791 yield self.branch_succ_i
792 yield self.branch_fail_i
793 yield self.branch_direction_o
794
795 def ports(self):
796 return list(self)
797
798
799 class IssueToScoreboard(Elaboratable):
800
801 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
802 self.qlen = qlen
803 self.n_in = n_in
804 self.n_out = n_out
805 self.rwid = rwid
806 self.opw = opwid
807 self.n_regs = n_regs
808
809 mqbits = unsigned(int(log(qlen) / log(2))+2)
810 self.p_add_i = Signal(mqbits) # instructions to add (from i_data)
811 self.p_o_ready = Signal() # instructions were added
812 self.i_data = Instruction._nq(n_in, "i_data")
813
814 self.busy_o = Signal(reset_less=True) # at least one CU is busy
815 self.qlen_o = Signal(mqbits, reset_less=True)
816
817 def elaborate(self, platform):
818 m = Module()
819 comb = m.d.comb
820 sync = m.d.sync
821
822 iq = InstructionQ(self.rwid, self.opw, self.qlen,
823 self.n_in, self.n_out)
824 sc = Scoreboard(self.rwid, self.n_regs)
825 m.submodules.iq = iq
826 m.submodules.sc = sc
827
828 # get at the regfile for testing
829 self.intregs = sc.intregs
830
831 # and the "busy" signal and instruction queue length
832 comb += self.busy_o.eq(sc.busy_o)
833 comb += self.qlen_o.eq(iq.qlen_o)
834
835 # link up instruction queue
836 comb += iq.p_add_i.eq(self.p_add_i)
837 comb += self.p_o_ready.eq(iq.p_o_ready)
838 for i in range(self.n_in):
839 comb += eq(iq.i_data[i], self.i_data[i])
840
841 # take instruction and process it. note that it's possible to
842 # "inspect" the queue contents *without* actually removing the
843 # items. items are only removed when the
844
845 # in "waiting" state
846 wait_issue_br = Signal()
847 wait_issue_alu = Signal()
848 wait_issue_ls = Signal()
849
850 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
851 # set instruction pop length to 1 if the unit accepted
852 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
853 with m.If(iq.qlen_o != 0):
854 comb += iq.n_sub_i.eq(1)
855 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
856 with m.If(iq.qlen_o != 0):
857 comb += iq.n_sub_i.eq(1)
858 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
859 with m.If(iq.qlen_o != 0):
860 comb += iq.n_sub_i.eq(1)
861
862 # see if some instruction(s) are here. note that this is
863 # "inspecting" the in-place queue. note also that on the
864 # cycle following "waiting" for fn_issue_o to be set, the
865 # "resetting" done above (insn_i=0) could be re-ASSERTed.
866 with m.If(iq.qlen_o != 0):
867 # get the operands and operation
868 instr = iq.o_data[0]
869 imm = instr.do.imm_data.data
870 dest = instr.write_reg.data
871 src1 = instr.read_reg1.data
872 src2 = instr.read_reg2.data
873 op = instr.do.insn_type
874 fu = instr.do.fn_unit
875 opi = instr.do.imm_data.ok # immediate set
876
877 # set the src/dest regs
878 comb += sc.int_dest_i.eq(dest)
879 comb += sc.int_src1_i.eq(src1)
880 comb += sc.int_src2_i.eq(src2)
881 comb += sc.reg_enable_i.eq(1) # enable the regfile
882 comb += sc.instr.eq(instr)
883
884 # choose a Function-Unit-Group
885 with m.If(fu == Function.ALU): # alu
886 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
887 comb += wait_issue_alu.eq(1)
888 with m.Elif(fu == Function.LDST): # ld/st
889 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
890 comb += wait_issue_ls.eq(1)
891
892 with m.Elif((op & (0x3 << 2)) != 0): # branch
893 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
894 comb += sc.br_imm_i.eq(imm)
895 comb += sc.brissue.insn_i.eq(1)
896 comb += wait_issue_br.eq(1)
897 # XXX TODO
898 # these indicate that the instruction is to be made
899 # shadow-dependent on
900 # (either) branch success or branch fail
901 # yield sc.branch_fail_i.eq(branch_fail)
902 # yield sc.branch_succ_i.eq(branch_success)
903
904 return m
905
906 def __iter__(self):
907 yield self.p_o_ready
908 for o in self.i_data:
909 yield from list(o)
910 yield self.p_add_i
911
912 def ports(self):
913 return list(self)
914
915
916 def power_instr_q(dut, pdecode2, ins, code):
917 instrs = [pdecode2.e]
918
919 sendlen = 1
920 for idx, instr in enumerate(instrs):
921 yield dut.i_data[idx].eq(instr)
922 insn_type = yield instr.do.insn_type
923 fn_unit = yield instr.do.fn_unit
924 print("senddata ", idx, insn_type, fn_unit, instr)
925 yield dut.p_add_i.eq(sendlen)
926 yield
927 o_p_ready = yield dut.p_o_ready
928 while not o_p_ready:
929 yield
930 o_p_ready = yield dut.p_o_ready
931
932 yield dut.p_add_i.eq(0)
933
934
935 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
936 branch_success, branch_fail):
937 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
938 'imm_data': (imm, op_imm),
939 'read_reg1': src1, 'read_reg2': src2}]
940
941 sendlen = 1
942 for idx, instr in enumerate(instrs):
943 imm, op_imm = instr['imm_data']
944 reg1 = instr['read_reg1']
945 reg2 = instr['read_reg2']
946 dest = instr['write_reg']
947 insn_type = instr['insn_type']
948 fn_unit = instr['fn_unit']
949 yield dut.i_data[idx].do.insn_type.eq(insn_type)
950 yield dut.i_data[idx].do.fn_unit.eq(fn_unit)
951 yield dut.i_data[idx].read_reg1.data.eq(reg1)
952 yield dut.i_data[idx].read_reg1.ok.eq(1) # XXX TODO
953 yield dut.i_data[idx].read_reg2.data.eq(reg2)
954 yield dut.i_data[idx].read_reg2.ok.eq(1) # XXX TODO
955 yield dut.i_data[idx].write_reg.data.eq(dest)
956 yield dut.i_data[idx].write_reg.ok.eq(1) # XXX TODO
957 yield dut.i_data[idx].do.imm_data.data.eq(imm)
958 yield dut.i_data[idx].do.imm_data.ok.eq(op_imm)
959 #di = yield dut.i_data[idx]
960 #print("senddata %d %x" % (idx, di))
961 yield dut.p_add_i.eq(sendlen)
962 yield
963 o_p_ready = yield dut.p_o_ready
964 while not o_p_ready:
965 yield
966 o_p_ready = yield dut.p_o_ready
967
968 yield dut.p_add_i.eq(0)
969
970
971 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
972 yield from disable_issue(dut)
973 yield dut.int_dest_i.eq(dest)
974 yield dut.int_src1_i.eq(src1)
975 yield dut.int_src2_i.eq(src2)
976 if (op & (0x3 << 2)) != 0: # branch
977 yield dut.brissue.insn_i.eq(1)
978 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
979 yield dut.br_imm_i.eq(imm)
980 dut_issue = dut.brissue
981 else:
982 yield dut.aluissue.insn_i.eq(1)
983 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
984 yield dut.alu_imm_i.eq(imm)
985 dut_issue = dut.aluissue
986 yield dut.reg_enable_i.eq(1)
987
988 # these indicate that the instruction is to be made shadow-dependent on
989 # (either) branch success or branch fail
990 yield dut.branch_fail_i.eq(branch_fail)
991 yield dut.branch_succ_i.eq(branch_success)
992
993 yield
994 yield from wait_for_issue(dut, dut_issue)
995
996
997 def print_reg(dut, rnums):
998 rs = []
999 for rnum in rnums:
1000 reg = yield dut.intregs.regs[rnum].reg
1001 rs.append("%x" % reg)
1002 rnums = map(str, rnums)
1003 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1004
1005
1006 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1007 insts = []
1008 for i in range(n_ops):
1009 src1 = randint(1, dut.n_regs-1)
1010 src2 = randint(1, dut.n_regs-1)
1011 imm = randint(1, (1 << dut.rwid)-1)
1012 dest = randint(1, dut.n_regs-1)
1013 op = randint(0, max_opnums)
1014 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
1015
1016 if shadowing:
1017 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1018 else:
1019 insts.append((src1, src2, dest, op, opi, imm))
1020 return insts
1021
1022
1023 def wait_for_busy_clear(dut):
1024 while True:
1025 busy_o = yield dut.busy_o
1026 if not busy_o:
1027 break
1028 print("busy",)
1029 yield
1030
1031
1032 def disable_issue(dut):
1033 yield dut.aluissue.insn_i.eq(0)
1034 yield dut.brissue.insn_i.eq(0)
1035 yield dut.lsissue.insn_i.eq(0)
1036
1037
1038 def wait_for_issue(dut, dut_issue):
1039 while True:
1040 issue_o = yield dut_issue.fn_issue_o
1041 if issue_o:
1042 yield from disable_issue(dut)
1043 yield dut.reg_enable_i.eq(0)
1044 break
1045 print("busy",)
1046 # yield from print_reg(dut, [1,2,3])
1047 yield
1048 # yield from print_reg(dut, [1,2,3])
1049
1050
1051 def scoreboard_branch_sim(dut, alusim):
1052
1053 iseed = 3
1054
1055 for i in range(1):
1056
1057 print("rseed", iseed)
1058 seed(iseed)
1059 iseed += 1
1060
1061 yield dut.branch_direction_o.eq(0)
1062
1063 # set random values in the registers
1064 for i in range(1, dut.n_regs):
1065 val = 31+i*3
1066 val = randint(0, (1 << alusim.rwidth)-1)
1067 yield dut.intregs.regs[i].reg.eq(val)
1068 alusim.setval(i, val)
1069
1070 if False:
1071 # create some instructions: branches create a tree
1072 insts = create_random_ops(dut, 1, True, 1)
1073 #insts.append((6, 6, 1, 2, (0, 0)))
1074 #insts.append((4, 3, 3, 0, (0, 0)))
1075
1076 src1 = randint(1, dut.n_regs-1)
1077 src2 = randint(1, dut.n_regs-1)
1078 #op = randint(4, 7)
1079 op = 4 # only BGT at the moment
1080
1081 branch_ok = create_random_ops(dut, 1, True, 1)
1082 branch_fail = create_random_ops(dut, 1, True, 1)
1083
1084 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1085
1086 if True:
1087 insts = []
1088 insts.append((3, 5, 2, 0, (0, 0)))
1089 branch_ok = []
1090 branch_fail = []
1091 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1092 branch_ok.append(None)
1093 branch_fail.append((1, 1, 2, 0, (0, 1)))
1094 #branch_fail.append( None )
1095 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1096
1097 siminsts = deepcopy(insts)
1098
1099 # issue instruction(s)
1100 i = -1
1101 instrs = insts
1102 branch_direction = 0
1103 while instrs:
1104 yield
1105 yield
1106 i += 1
1107 branch_direction = yield dut.branch_direction_o # way branch went
1108 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1109 if branch_direction == 1 and shadow_on:
1110 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1111 continue # branch was "success" and this is a "failed"... skip
1112 if branch_direction == 2 and shadow_off:
1113 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1114 continue # branch was "fail" and this is a "success"... skip
1115 if branch_direction != 0:
1116 shadow_on = 0
1117 shadow_off = 0
1118 is_branch = op >= 4
1119 if is_branch:
1120 branch_ok, branch_fail = dest
1121 dest = src2
1122 # ok zip up the branch success / fail instructions and
1123 # drop them into the queue, one marked "to have branch success"
1124 # the other to be marked shadow branch "fail".
1125 # one out of each of these will be cancelled
1126 for ok, fl in zip(branch_ok, branch_fail):
1127 if ok:
1128 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1129 if fl:
1130 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1131 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1132 (i, src1, src2, dest, op, shadow_on, shadow_off))
1133 yield from int_instr(dut, op, src1, src2, dest,
1134 shadow_on, shadow_off)
1135
1136 # wait for all instructions to stop before checking
1137 yield
1138 yield from wait_for_busy_clear(dut)
1139
1140 i = -1
1141 while siminsts:
1142 instr = siminsts.pop(0)
1143 if instr is None:
1144 continue
1145 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1146 i += 1
1147 is_branch = op >= 4
1148 if is_branch:
1149 branch_ok, branch_fail = dest
1150 dest = src2
1151 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1152 (i, src1, src2, dest, op, shadow_on, shadow_off))
1153 branch_res = alusim.op(op, src1, src2, dest)
1154 if is_branch:
1155 if branch_res:
1156 siminsts += branch_ok
1157 else:
1158 siminsts += branch_fail
1159
1160 # check status
1161 yield from alusim.check(dut)
1162 yield from alusim.dump(dut)
1163
1164
1165 def power_sim(m, dut, pdecode2, instruction, alusim):
1166
1167 seed(0)
1168
1169 for i in range(1):
1170
1171 # set random values in the registers
1172 for i in range(1, dut.n_regs):
1173 #val = randint(0, (1<<alusim.rwidth)-1)
1174 #val = 31+i*3
1175 val = i # XXX actually, not random at all
1176 yield dut.intregs.regs[i].reg.eq(val)
1177 alusim.setval(i, val)
1178
1179 # create some instructions
1180 lst = []
1181 if False:
1182 lst += ["addi 2, 0, 0x4321",
1183 "addi 3, 0, 0x1234",
1184 "add 1, 3, 2",
1185 "add 4, 3, 5"
1186 ]
1187 if True:
1188 lst += ["lbzu 6, 7(2)",
1189
1190 ]
1191
1192 with Program(lst, bigendian=False) as program:
1193 gen = program.generate_instructions()
1194
1195 # issue instruction(s), wait for issue to be free before proceeding
1196 for ins, code in zip(gen, program.assembly.splitlines()):
1197 yield instruction.eq(ins) # raw binary instr.
1198 yield # Delay(1e-6)
1199
1200 print("binary 0x{:X}".format(ins & 0xffffffff))
1201 print("assembly", code)
1202
1203 #alusim.op(op, opi, imm, src1, src2, dest)
1204 yield from power_instr_q(dut, pdecode2, ins, code)
1205
1206 # wait for all instructions to stop before checking
1207 while True:
1208 iqlen = yield dut.qlen_o
1209 if iqlen == 0:
1210 break
1211 yield
1212 yield
1213 yield
1214 yield
1215 yield
1216 yield from wait_for_busy_clear(dut)
1217
1218 # check status
1219 yield from alusim.check(dut)
1220 yield from alusim.dump(dut)
1221
1222
1223 def scoreboard_sim(dut, alusim):
1224
1225 seed(0)
1226
1227 for i in range(1):
1228
1229 # set random values in the registers
1230 for i in range(1, dut.n_regs):
1231 #val = randint(0, (1<<alusim.rwidth)-1)
1232 #val = 31+i*3
1233 val = i
1234 yield dut.intregs.regs[i].reg.eq(val)
1235 alusim.setval(i, val)
1236
1237 # create some instructions (some random, some regression tests)
1238 instrs = []
1239 if False:
1240 instrs = create_random_ops(dut, 15, True, 4)
1241
1242 if False: # LD/ST test (with immediate)
1243 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1244 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1245
1246 if False:
1247 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1248
1249 if False:
1250 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1251 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1252 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1253
1254 if True:
1255 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1256 0, 0, (0, 0)))
1257 instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1258 0, 0, (0, 0)))
1259 if True:
1260 instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1261 1, 7, (0, 0)))
1262 if False:
1263 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1264 0, 0, (0, 0)))
1265
1266 if False:
1267 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1268 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1269 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1270 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1271 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1272
1273 if False:
1274 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1275 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1276 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1277
1278 if False:
1279 instrs.append((5, 6, 2, 1))
1280 instrs.append((2, 2, 4, 0))
1281 #instrs.append((2, 2, 3, 1))
1282
1283 if False:
1284 instrs.append((2, 1, 2, 3))
1285
1286 if False:
1287 instrs.append((2, 6, 2, 1))
1288 instrs.append((2, 1, 2, 0))
1289
1290 if False:
1291 instrs.append((1, 2, 7, 2))
1292 instrs.append((7, 1, 5, 0))
1293 instrs.append((4, 4, 1, 1))
1294
1295 if False:
1296 instrs.append((5, 6, 2, 2))
1297 instrs.append((1, 1, 4, 1))
1298 instrs.append((6, 5, 3, 0))
1299
1300 if False:
1301 # Write-after-Write Hazard
1302 instrs.append((3, 6, 7, 2))
1303 instrs.append((4, 4, 7, 1))
1304
1305 if False:
1306 # self-read/write-after-write followed by Read-after-Write
1307 instrs.append((1, 1, 1, 1))
1308 instrs.append((1, 5, 3, 0))
1309
1310 if False:
1311 # Read-after-Write followed by self-read-after-write
1312 instrs.append((5, 6, 1, 2))
1313 instrs.append((1, 1, 1, 1))
1314
1315 if False:
1316 # self-read-write sandwich
1317 instrs.append((5, 6, 1, 2))
1318 instrs.append((1, 1, 1, 1))
1319 instrs.append((1, 5, 3, 0))
1320
1321 if False:
1322 # very weird failure
1323 instrs.append((5, 2, 5, 2))
1324 instrs.append((2, 6, 3, 0))
1325 instrs.append((4, 2, 2, 1))
1326
1327 if False:
1328 v1 = 4
1329 yield dut.intregs.regs[5].reg.eq(v1)
1330 alusim.setval(5, v1)
1331 yield dut.intregs.regs[3].reg.eq(5)
1332 alusim.setval(3, 5)
1333 instrs.append((5, 3, 3, 4, (0, 0)))
1334 instrs.append((4, 2, 1, 2, (0, 1)))
1335
1336 if False:
1337 v1 = 6
1338 yield dut.intregs.regs[5].reg.eq(v1)
1339 alusim.setval(5, v1)
1340 yield dut.intregs.regs[3].reg.eq(5)
1341 alusim.setval(3, 5)
1342 instrs.append((5, 3, 3, 4, (0, 0)))
1343 instrs.append((4, 2, 1, 2, (1, 0)))
1344
1345 if False:
1346 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1347 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1348 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1349 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1350 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1351 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1352 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1353 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1354 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1355
1356 # issue instruction(s), wait for issue to be free before proceeding
1357 print("instructions", instrs)
1358 for i, instr in enumerate(instrs):
1359 print("issue instruction", i, instr)
1360 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1361
1362 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1363 (i, src1, src2, dest, op, fn_unit, opi, imm))
1364 alusim.op(op, opi, imm, src1, src2, dest)
1365 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1366 br_ok, br_fail)
1367
1368 # wait for all instructions to stop before checking
1369 while True:
1370 iqlen = yield dut.qlen_o
1371 if iqlen == 0:
1372 break
1373 yield
1374 yield
1375 yield
1376 yield
1377 yield
1378 yield from wait_for_busy_clear(dut)
1379
1380 # check status
1381 yield from alusim.check(dut)
1382 yield from alusim.dump(dut)
1383
1384
1385 def test_scoreboard():
1386 regwidth = 64
1387 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1388 alusim = RegSim(regwidth, 8)
1389 memsim = MemSim(16, 8)
1390
1391 m = Module()
1392 comb = m.d.comb
1393 instruction = Signal(32)
1394
1395 # set up the decoder (and simulator, later)
1396 pdecode = create_pdecode()
1397 #simulator = ISA(pdecode, initial_regs)
1398
1399 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1400 m.submodules.sim = dut
1401
1402 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1403 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1404
1405 vl = rtlil.convert(m, ports=dut.ports())
1406 with open("test_scoreboard6600.il", "w") as f:
1407 f.write(vl)
1408
1409 #run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1410 # vcd_name='test_powerboard6600.vcd')
1411
1412 run_simulation(dut, scoreboard_sim(dut, alusim),
1413 vcd_name='test_scoreboard6600.vcd')
1414
1415 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1416 # vcd_name='test_scoreboard6600.vcd')
1417
1418
1419 if __name__ == '__main__':
1420 test_scoreboard()