continuing debugging of LD/ST CompUnit FSM and unit test
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst import LDSTCompUnit
20 from soc.experiment.testmem import TestMemory
21
22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
23
24 from soc.decoder.power_enums import InternalOp, Function
25 from soc.decoder.power_decoder import (create_pdecode)
26 from soc.decoder.power_decoder2 import (PowerDecode2)
27 from soc.simulator.program import Program
28
29
30 from nmutil.latch import SRLatch
31 from nmutil.nmoperator import eq
32
33 from random import randint, seed
34 from copy import deepcopy
35 from math import log
36
37 from soc.experiment.sim import RegSim, MemSim
38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
39
40
41 class CompUnitsBase(Elaboratable):
42 """ Computation Unit Base class.
43
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
48
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
52 on Computation Units!
53
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
57 ALUs together.
58
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
64 11.4.9.3)
65 """
66
67 def __init__(self, rwid, units, ldstmode=False):
68 """ Inputs:
69
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
72 """
73 self.units = units
74 self.ldstmode = ldstmode
75 self.rwid = rwid
76 self.rwid = rwid
77 if units and isinstance(units[0], CompUnitsBase):
78 self.n_units = 0
79 for u in self.units:
80 self.n_units += u.n_units
81 else:
82 self.n_units = len(units)
83
84 n_units = self.n_units
85
86 # inputs
87 self.issue_i = Signal(n_units, reset_less=True)
88 self.rd0 = go_record(n_units, "rd0")
89 self.rd1 = go_record(n_units, "rd1")
90 self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
91 self.wr0 = go_record(n_units, "wr0")
92 self.go_wr_i = [self.wr0.go]
93 self.shadown_i = Signal(n_units, reset_less=True)
94 self.go_die_i = Signal(n_units, reset_less=True)
95 if ldstmode:
96 self.go_ad_i = Signal(n_units, reset_less=True)
97 self.go_st_i = Signal(n_units, reset_less=True)
98
99 # outputs
100 self.busy_o = Signal(n_units, reset_less=True)
101 self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
102 self.req_rel_o = self.wr0.rel
103 self.done_o = Signal(n_units, reset_less=True)
104 if ldstmode:
105 self.ld_o = Signal(n_units, reset_less=True) # op is LD
106 self.st_o = Signal(n_units, reset_less=True) # op is ST
107 self.adr_rel_o = Signal(n_units, reset_less=True)
108 self.sto_rel_o = Signal(n_units, reset_less=True)
109 self.load_mem_o = Signal(n_units, reset_less=True)
110 self.stwd_mem_o = Signal(n_units, reset_less=True)
111 self.addr_o = Signal(rwid, reset_less=True)
112
113 # in/out register data (note: not register#, actual data)
114 self.data_o = Signal(rwid, reset_less=True)
115 self.src1_i = Signal(rwid, reset_less=True)
116 self.src2_i = Signal(rwid, reset_less=True)
117 # input operand
118
119 def elaborate(self, platform):
120 m = Module()
121 comb = m.d.comb
122
123 for i, alu in enumerate(self.units):
124 setattr(m.submodules, "comp%d" % i, alu)
125
126 go_rd_l0 = []
127 go_rd_l1 = []
128 go_wr_l = []
129 issue_l = []
130 busy_l = []
131 req_rel_l = []
132 done_l = []
133 rd_rel0_l = []
134 rd_rel1_l = []
135 shadow_l = []
136 godie_l = []
137 for alu in self.units:
138 req_rel_l.append(alu.req_rel_o)
139 done_l.append(alu.done_o)
140 shadow_l.append(alu.shadown_i)
141 godie_l.append(alu.go_die_i)
142 print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
143 rd_rel0_l.append(alu.rd_rel_o[0])
144 rd_rel1_l.append(alu.rd_rel_o[1])
145 go_wr_l.append(alu.go_wr_i)
146 go_rd_l0.append(alu.go_rd_i[0])
147 go_rd_l1.append(alu.go_rd_i[1])
148 issue_l.append(alu.issue_i)
149 busy_l.append(alu.busy_o)
150 comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
151 comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
152 comb += self.req_rel_o.eq(Cat(*req_rel_l))
153 comb += self.done_o.eq(Cat(*done_l))
154 comb += self.busy_o.eq(Cat(*busy_l))
155 comb += Cat(*godie_l).eq(self.go_die_i)
156 comb += Cat(*shadow_l).eq(self.shadown_i)
157 comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
158 comb += Cat(*go_rd_l0).eq(self.rd0.go)
159 comb += Cat(*go_rd_l1).eq(self.rd1.go)
160 comb += Cat(*issue_l).eq(self.issue_i)
161
162 # connect data register input/output
163
164 # merge (OR) all integer FU / ALU outputs to a single value
165 # XXX NOTE: this only works because there is a single "port"
166 # protected by a single go_wr. multi-issue requires a bus
167 # to be inserted here.
168 if self.units:
169 data_o = treereduce(self.units, "data_o")
170 comb += self.data_o.eq(data_o)
171 if self.ldstmode:
172 addr_o = treereduce(self.units, "addr_o")
173 comb += self.addr_o.eq(addr_o)
174
175 for i, alu in enumerate(self.units):
176 comb += alu.src1_i.eq(self.src1_i)
177 comb += alu.src2_i.eq(self.src2_i)
178
179 if not self.ldstmode:
180 return m
181
182 ldmem_l = []
183 stmem_l = []
184 go_ad_l = []
185 go_st_l = []
186 ld_l = []
187 st_l = []
188 adr_rel_l = []
189 sto_rel_l = []
190 for alu in self.units:
191 ld_l.append(alu.ld_o)
192 st_l.append(alu.st_o)
193 adr_rel_l.append(alu.adr_rel_o)
194 sto_rel_l.append(alu.sto_rel_o)
195 ldmem_l.append(alu.load_mem_o)
196 stmem_l.append(alu.stwd_mem_o)
197 go_ad_l.append(alu.go_ad_i)
198 go_st_l.append(alu.go_st_i)
199 comb += self.ld_o.eq(Cat(*ld_l))
200 comb += self.st_o.eq(Cat(*st_l))
201 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
202 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
203 comb += self.load_mem_o.eq(Cat(*ldmem_l))
204 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
205 comb += Cat(*go_ad_l).eq(self.go_ad_i)
206 comb += Cat(*go_st_l).eq(self.go_st_i)
207
208 return m
209
210
211 class CompUnitLDSTs(CompUnitsBase):
212
213 def __init__(self, rwid, opwid, n_ldsts, mem):
214 """ Inputs:
215
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
218 """
219 self.opwid = opwid
220
221 # inputs
222 self.op = CompALUOpSubset("cua_i")
223
224 # Int ALUs
225 self.alus = []
226 for i in range(n_ldsts):
227 self.alus.append(ALU(rwid))
228
229 units = []
230 for i, alu in enumerate(self.alus):
231 # XXX disable the 2nd memory temporarily
232 if i == 0:
233 debugtest = False
234 else:
235 debugtest = True
236 units.append(LDSTCompUnit(rwid, alu, mem, debugtest=debugtest))
237
238 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
239
240 def elaborate(self, platform):
241 m = CompUnitsBase.elaborate(self, platform)
242 comb = m.d.comb
243
244 # hand the same operation to all units, 4 lower bits though
245 for alu in self.units:
246 comb += alu.oper_i.eq(self.op)
247 comb += alu.isalu_i.eq(0)
248
249 return m
250
251
252 class CompUnitALUs(CompUnitsBase):
253
254 def __init__(self, rwid, opwid, n_alus):
255 """ Inputs:
256
257 * :rwid: bit width of register file(s) - both FP and INT
258 * :opwid: operand bit width
259 """
260 self.opwid = opwid
261
262 # inputs
263 self.op = CompALUOpSubset("cua_i")
264 self.oper_i = Signal(opwid, reset_less=True)
265 self.imm_i = Signal(rwid, reset_less=True)
266
267 # Int ALUs
268 alus = []
269 for i in range(n_alus):
270 alus.append(ALU(rwid))
271
272 units = []
273 for alu in alus:
274 aluopwid = 3 # extra bit for immediate mode
275 units.append(MultiCompUnit(rwid, alu))
276
277 CompUnitsBase.__init__(self, rwid, units)
278
279 def elaborate(self, platform):
280 m = CompUnitsBase.elaborate(self, platform)
281 comb = m.d.comb
282
283 # hand the subset of operation to ALUs
284 for alu in self.units:
285 comb += alu.oper_i.eq(self.op)
286 #comb += alu.oper_i[0:3].eq(self.oper_i)
287 #comb += alu.imm_i.eq(self.imm_i)
288
289 return m
290
291
292 class CompUnitBR(CompUnitsBase):
293
294 def __init__(self, rwid, opwid):
295 """ Inputs:
296
297 * :rwid: bit width of register file(s) - both FP and INT
298 * :opwid: operand bit width
299
300 Note: bgt unit is returned so that a shadow unit can be created
301 for it
302 """
303 self.opwid = opwid
304
305 # inputs
306 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
307 self.oper_i = Signal(opwid, reset_less=True)
308 self.imm_i = Signal(rwid, reset_less=True)
309
310 # Branch ALU and CU
311 self.bgt = BranchALU(rwid)
312 aluopwid = 3 # extra bit for immediate mode
313 self.br1 = MultiCompUnit(rwid, self.bgt)
314 CompUnitsBase.__init__(self, rwid, [self.br1])
315
316 def elaborate(self, platform):
317 m = CompUnitsBase.elaborate(self, platform)
318 comb = m.d.comb
319
320 # hand the same operation to all units
321 for alu in self.units:
322 #comb += alu.oper_i.eq(self.op) # TODO
323 comb += alu.oper_i.eq(self.oper_i)
324 #comb += alu.imm_i.eq(self.imm_i)
325
326 return m
327
328
329 class FunctionUnits(Elaboratable):
330
331 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
332 self.n_src, self.n_dst = n_src, n_dst
333 self.n_reg = n_reg
334 self.n_int_alus = nf = n_int_alus
335
336 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
337 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
338
339 self.readable_o = Signal(n_int_alus, reset_less=True)
340 self.writable_o = Signal(n_int_alus, reset_less=True)
341
342 # arrays
343 src = []
344 rsel = []
345 rd = []
346 for i in range(n_src):
347 j = i + 1 # name numbering to match src1/src2
348 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
349 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
350 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
351 dst = []
352 dsel = []
353 wr = []
354 for i in range(n_dst):
355 j = i + 1 # name numbering to match src1/src2
356 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
357 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
358 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
359 wpnd = []
360 pend = []
361 for i in range(nf):
362 j = i + 1 # name numbering to match src1/src2
363 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
364 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
365
366 self.dest_i = Array(dst) # Dest in (top)
367 self.src_i = Array(src) # oper in (top)
368
369 # for Register File Select Lines (horizontal), per-reg
370 self.dst_rsel_o = Array(dsel) # dest reg (bot)
371 self.src_rsel_o = Array(rsel) # src reg (bot)
372
373 self.go_rd_i = Array(rd)
374 self.go_wr_i = Array(wr)
375
376 self.go_die_i = Signal(n_int_alus, reset_less=True)
377 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
378
379 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
380
381 def elaborate(self, platform):
382 m = Module()
383 comb = m.d.comb
384 sync = m.d.sync
385
386 n_intfus = self.n_int_alus
387
388 # Integer FU-FU Dep Matrix
389 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
390 m.submodules.intfudeps = intfudeps
391 # Integer FU-Reg Dep Matrix
392 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
393 m.submodules.intregdeps = intregdeps
394
395 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
396 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
397
398 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
399 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
400
401 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
402 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
403 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
404
405 comb += intfudeps.issue_i.eq(self.fn_issue_i)
406 comb += intfudeps.go_die_i.eq(self.go_die_i)
407 comb += self.readable_o.eq(intfudeps.readable_o)
408 comb += self.writable_o.eq(intfudeps.writable_o)
409
410 # Connect function issue / arrays, and dest/src1/src2
411 for i in range(self.n_src):
412 print (i, self.go_rd_i, intfudeps.go_rd_i)
413 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
414 comb += intregdeps.src_i[i].eq(self.src_i[i])
415 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
416 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
417 for i in range(self.n_dst):
418 print (i, self.go_wr_i, intfudeps.go_wr_i)
419 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
420 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
421 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
422 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
423 comb += intregdeps.go_die_i.eq(self.go_die_i)
424 comb += intregdeps.issue_i.eq(self.fn_issue_i)
425
426 return m
427
428
429 class Scoreboard(Elaboratable):
430 def __init__(self, rwid, n_regs):
431 """ Inputs:
432
433 * :rwid: bit width of register file(s) - both FP and INT
434 * :n_regs: depth of register file(s) - number of FP and INT regs
435 """
436 self.rwid = rwid
437 self.n_regs = n_regs
438
439 # Register Files
440 self.intregs = RegFileArray(rwid, n_regs)
441 self.fpregs = RegFileArray(rwid, n_regs)
442
443 # Memory (test for now)
444 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
445
446 # issue q needs to get at these
447 self.aluissue = IssueUnitGroup(2)
448 self.lsissue = IssueUnitGroup(2)
449 self.brissue = IssueUnitGroup(1)
450 # and these
451 self.alu_op = CompALUOpSubset("alu")
452 self.br_oper_i = Signal(4, reset_less=True)
453 self.br_imm_i = Signal(rwid, reset_less=True)
454 self.ls_oper_i = Signal(4, reset_less=True)
455
456 # inputs
457 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
458 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
459 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
460 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
461
462 # outputs
463 self.issue_o = Signal(reset_less=True) # instruction was accepted
464 self.busy_o = Signal(reset_less=True) # at least one CU is busy
465
466 # for branch speculation experiment. branch_direction = 0 if
467 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
468 # branch_succ and branch_fail are requests to have the current
469 # instruction be dependent on the branch unit "shadow" capability.
470 self.branch_succ_i = Signal(reset_less=True)
471 self.branch_fail_i = Signal(reset_less=True)
472 self.branch_direction_o = Signal(2, reset_less=True)
473
474 def elaborate(self, platform):
475 m = Module()
476 comb = m.d.comb
477 sync = m.d.sync
478
479 m.submodules.intregs = self.intregs
480 m.submodules.fpregs = self.fpregs
481 m.submodules.mem = mem = self.mem
482
483 # register ports
484 int_dest = self.intregs.write_port("dest")
485 int_src1 = self.intregs.read_port("src1")
486 int_src2 = self.intregs.read_port("src2")
487
488 fp_dest = self.fpregs.write_port("dest")
489 fp_src1 = self.fpregs.read_port("src1")
490 fp_src2 = self.fpregs.read_port("src2")
491
492 # Int ALUs and BR ALUs
493 n_int_alus = 5
494 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
495 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
496
497 # LDST Comp Units
498 n_ldsts = 2
499 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
500
501 # Comp Units
502 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
503 bgt = cub.bgt # get at the branch computation unit
504 br1 = cub.br1
505
506 # Int FUs
507 fu_n_src = 2
508 fu_n_dst = 1
509 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
510 fu_n_src, fu_n_dst)
511
512 # Memory FUs
513 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
514
515 # Memory Priority Picker 1: one gateway per memory port
516 # picks 1 reader and 1 writer to intreg
517 mempick1 = GroupPicker(n_ldsts, 1, 1)
518 m.submodules.mempick1 = mempick1
519
520 # Count of number of FUs
521 n_intfus = n_int_alus
522 n_fp_fus = 0 # for now
523
524 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
525 # picks 1 reader and 1 writer to intreg
526 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
527 m.submodules.intpick1 = ipick1
528
529 # INT/FP Issue Unit
530 regdecode = RegDecode(self.n_regs)
531 m.submodules.regdecode = regdecode
532 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
533 m.submodules.issueunit = issueunit
534
535 # Shadow Matrix. currently n_intfus shadows, to be used for
536 # write-after-write hazards. NOTE: there is one extra for branches,
537 # so the shadow width is increased by 1
538 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
539 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
540
541 # record previous instruction to cast shadow on current instruction
542 prev_shadow = Signal(n_intfus)
543
544 # Branch Speculation recorder. tracks the success/fail state as
545 # each instruction is issued, so that when the branch occurs the
546 # allow/cancel can be issued as appropriate.
547 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
548
549 # ---------
550 # ok start wiring things together...
551 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
552 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
553 # ---------
554
555 # ---------
556 # Issue Unit is where it starts. set up some in/outs for this module
557 # ---------
558 comb += [regdecode.dest_i.eq(self.int_dest_i),
559 regdecode.src1_i.eq(self.int_src1_i),
560 regdecode.src2_i.eq(self.int_src2_i),
561 regdecode.enable_i.eq(self.reg_enable_i),
562 self.issue_o.eq(issueunit.issue_o)
563 ]
564
565 # take these to outside (issue needs them)
566 comb += cua.op.eq(self.alu_op)
567 comb += cub.oper_i.eq(self.br_oper_i)
568 comb += cub.imm_i.eq(self.br_imm_i)
569 comb += cul.op.eq(self.alu_op) # TODO: separate ls_op?
570
571 # TODO: issueunit.f (FP)
572
573 # and int function issue / busy arrays, and dest/src1/src2
574 comb += intfus.dest_i[0].eq(regdecode.dest_o)
575 comb += intfus.src_i[0].eq(regdecode.src1_o)
576 comb += intfus.src_i[1].eq(regdecode.src2_o)
577
578 fn_issue_o = issueunit.fn_issue_o
579
580 comb += intfus.fn_issue_i.eq(fn_issue_o)
581 comb += issueunit.busy_i.eq(cu.busy_o)
582 comb += self.busy_o.eq(cu.busy_o.bool())
583
584 # ---------
585 # Memory Function Unit
586 # ---------
587 reset_b = Signal(cul.n_units, reset_less=True)
588 # XXX was cul.go_wr_i not done.o
589 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
590 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
591
592 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
593 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
594 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
595
596 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
597 # in a transitive fashion). This cycle activates based on LDSTCompUnit
598 # issue_i. multi-issue gets a bit more complex but not a lot.
599 prior_ldsts = Signal(cul.n_units, reset_less=True)
600 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
601 with m.If(self.ls_oper_i[3]): # LD bit of operand
602 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
603 with m.If(self.ls_oper_i[2]): # ST bit of operand
604 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
605
606 # TODO: adr_rel_o needs to go into L1 Cache. for now,
607 # just immediately activate go_adr
608 comb += cul.go_ad_i.eq(cul.adr_rel_o)
609
610 # connect up address data
611 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
612 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
613
614 # connect loadable / storable to go_ld/go_st.
615 # XXX should only be done when the memory ld/st has actually happened!
616 go_st_i = Signal(cul.n_units, reset_less=True)
617 go_ld_i = Signal(cul.n_units, reset_less=True)
618 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
619 cul.adr_rel_o & cul.ld_o)
620 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
621 cul.sto_rel_o & cul.st_o)
622 comb += memfus.go_ld_i.eq(go_ld_i)
623 comb += memfus.go_st_i.eq(go_st_i)
624 #comb += cul.go_wr_i.eq(go_ld_i)
625 comb += cul.go_st_i.eq(go_st_i)
626
627 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
628 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
629 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
630
631 # ---------
632 # merge shadow matrices outputs
633 # ---------
634
635 # these are explained in ShadowMatrix docstring, and are to be
636 # connected to the FUReg and FUFU Matrices, to get them to reset
637 anydie = Signal(n_intfus, reset_less=True)
638 allshadown = Signal(n_intfus, reset_less=True)
639 shreset = Signal(n_intfus, reset_less=True)
640 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
641 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
642 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
643
644 # ---------
645 # connect fu-fu matrix
646 # ---------
647
648 # Group Picker... done manually for now.
649 go_rd_o = ipick1.go_rd_o
650 go_wr_o = ipick1.go_wr_o
651 go_rd_i = intfus.go_rd_i
652 go_wr_i = intfus.go_wr_i
653 go_die_i = intfus.go_die_i
654 # NOTE: connect to the shadowed versions so that they can "die" (reset)
655 for i in range(fu_n_src):
656 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
657 for i in range(fu_n_dst):
658 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
659 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
660
661 # Connect Picker
662 # ---------
663 int_rd_o = intfus.readable_o
664 rrel_o = cu.rd_rel_o
665 rqrl_o = cu.req_rel_o
666 for i in range(fu_n_src):
667 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
668 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
669 int_wr_o = intfus.writable_o
670 for i in range(fu_n_dst):
671 # XXX FIXME: rqrl_o[i] here
672 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
673 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
674
675 # ---------
676 # Shadow Matrix
677 # ---------
678
679 comb += shadows.issue_i.eq(fn_issue_o)
680 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
681 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
682 # ---------
683 # NOTE; this setup is for the instruction order preservation...
684
685 # connect shadows / go_dies to Computation Units
686 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
687 comb += cu.go_die_i[0:n_intfus].eq(anydie)
688
689 # ok connect first n_int_fu shadows to busy lines, to create an
690 # instruction-order linked-list-like arrangement, using a bit-matrix
691 # (instead of e.g. a ring buffer).
692
693 # when written, the shadow can be cancelled (and was good)
694 for i in range(n_intfus):
695 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
696 # XXX experiment: use ~cu.busy_o instead. *should* be good
697 # because the comp unit is only free once completed
698 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
699
700 # *previous* instruction shadows *current* instruction, and, obviously,
701 # if the previous is completed (!busy) don't cast the shadow!
702 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
703 for i in range(n_intfus):
704 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
705
706 # ---------
707 # ... and this is for branch speculation. it uses the extra bit
708 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
709 # only needs to set shadow_i, s_fail_i and s_good_i
710
711 # issue captures shadow_i (if enabled)
712 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
713
714 bactive = Signal(reset_less=True)
715 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
716
717 # instruction being issued (fn_issue_o) has a shadow cast by the branch
718 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
719 comb += bshadow.issue_i.eq(fn_issue_o)
720 for i in range(n_intfus):
721 with m.If(fn_issue_o & (Const(1 << i))):
722 comb += bshadow.shadow_i[i][0].eq(1)
723
724 # finally, we need an indicator to the test infrastructure as to
725 # whether the branch succeeded or failed, plus, link up to the
726 # "recorder" of whether the instruction was under shadow or not
727
728 with m.If(br1.issue_i):
729 sync += bspec.active_i.eq(1)
730 with m.If(self.branch_succ_i):
731 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
732 with m.If(self.branch_fail_i):
733 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
734
735 # branch is active (TODO: a better signal: this is over-using the
736 # go_write signal - actually the branch should not be "writing")
737 with m.If(br1.go_wr_i):
738 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
739 sync += bspec.active_i.eq(0)
740 comb += bspec.br_i.eq(1)
741 # branch occurs if data == 1, failed if data == 0
742 comb += bspec.br_ok_i.eq(br1.data_o == 1)
743 for i in range(n_intfus):
744 # *expected* direction of the branch matched against *actual*
745 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
746 # ... or it didn't
747 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
748
749 # ---------
750 # Connect Register File(s)
751 # ---------
752 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
753 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
754 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
755
756 # connect ALUs to regfile
757 comb += int_dest.data_i.eq(cu.data_o)
758 comb += cu.src1_i.eq(int_src1.data_o)
759 comb += cu.src2_i.eq(int_src2.data_o)
760
761 # connect ALU Computation Units
762 for i in range(fu_n_src):
763 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
764 for i in range(fu_n_dst):
765 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
766 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
767
768 return m
769
770 def __iter__(self):
771 yield from self.intregs
772 yield from self.fpregs
773 yield self.int_dest_i
774 yield self.int_src1_i
775 yield self.int_src2_i
776 yield self.issue_o
777 yield self.branch_succ_i
778 yield self.branch_fail_i
779 yield self.branch_direction_o
780
781 def ports(self):
782 return list(self)
783
784
785 class IssueToScoreboard(Elaboratable):
786
787 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
788 self.qlen = qlen
789 self.n_in = n_in
790 self.n_out = n_out
791 self.rwid = rwid
792 self.opw = opwid
793 self.n_regs = n_regs
794
795 mqbits = unsigned(int(log(qlen) / log(2))+2)
796 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
797 self.p_ready_o = Signal() # instructions were added
798 self.data_i = Instruction._nq(n_in, "data_i")
799
800 self.busy_o = Signal(reset_less=True) # at least one CU is busy
801 self.qlen_o = Signal(mqbits, reset_less=True)
802
803 def elaborate(self, platform):
804 m = Module()
805 comb = m.d.comb
806 sync = m.d.sync
807
808 iq = InstructionQ(self.rwid, self.opw, self.qlen,
809 self.n_in, self.n_out)
810 sc = Scoreboard(self.rwid, self.n_regs)
811 m.submodules.iq = iq
812 m.submodules.sc = sc
813
814 # get at the regfile for testing
815 self.intregs = sc.intregs
816
817 # and the "busy" signal and instruction queue length
818 comb += self.busy_o.eq(sc.busy_o)
819 comb += self.qlen_o.eq(iq.qlen_o)
820
821 # link up instruction queue
822 comb += iq.p_add_i.eq(self.p_add_i)
823 comb += self.p_ready_o.eq(iq.p_ready_o)
824 for i in range(self.n_in):
825 comb += eq(iq.data_i[i], self.data_i[i])
826
827 # take instruction and process it. note that it's possible to
828 # "inspect" the queue contents *without* actually removing the
829 # items. items are only removed when the
830
831 # in "waiting" state
832 wait_issue_br = Signal()
833 wait_issue_alu = Signal()
834 wait_issue_ls = Signal()
835
836 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
837 # set instruction pop length to 1 if the unit accepted
838 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
839 with m.If(iq.qlen_o != 0):
840 comb += iq.n_sub_i.eq(1)
841 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
842 with m.If(iq.qlen_o != 0):
843 comb += iq.n_sub_i.eq(1)
844 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
845 with m.If(iq.qlen_o != 0):
846 comb += iq.n_sub_i.eq(1)
847
848 # see if some instruction(s) are here. note that this is
849 # "inspecting" the in-place queue. note also that on the
850 # cycle following "waiting" for fn_issue_o to be set, the
851 # "resetting" done above (insn_i=0) could be re-ASSERTed.
852 with m.If(iq.qlen_o != 0):
853 # get the operands and operation
854 instr = iq.data_o[0]
855 imm = instr.imm_data.data
856 dest = instr.write_reg.data
857 src1 = instr.read_reg1.data
858 src2 = instr.read_reg2.data
859 op = instr.insn_type
860 fu = instr.fn_unit
861 opi = instr.imm_data.ok # immediate set
862
863 # set the src/dest regs
864 comb += sc.int_dest_i.eq(dest)
865 comb += sc.int_src1_i.eq(src1)
866 comb += sc.int_src2_i.eq(src2)
867 comb += sc.reg_enable_i.eq(1) # enable the regfile
868
869 # choose a Function-Unit-Group
870 with m.If(fu == Function.ALU): # alu
871 comb += sc.alu_op.eq_from_execute1(instr)
872 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
873 comb += wait_issue_alu.eq(1)
874 with m.Elif(fu == Function.LDST): # ld/st
875 comb += sc.alu_op.eq_from_execute1(instr) # XXX separate ls_op?
876 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
877 comb += wait_issue_ls.eq(1)
878
879 with m.Elif((op & (0x3 << 2)) != 0): # branch
880 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
881 comb += sc.br_imm_i.eq(imm)
882 comb += sc.brissue.insn_i.eq(1)
883 comb += wait_issue_br.eq(1)
884 # XXX TODO
885 # these indicate that the instruction is to be made
886 # shadow-dependent on
887 # (either) branch success or branch fail
888 # yield sc.branch_fail_i.eq(branch_fail)
889 # yield sc.branch_succ_i.eq(branch_success)
890
891 return m
892
893 def __iter__(self):
894 yield self.p_ready_o
895 for o in self.data_i:
896 yield from list(o)
897 yield self.p_add_i
898
899 def ports(self):
900 return list(self)
901
902
903 def power_instr_q(dut, pdecode2, ins, code):
904 instrs = [pdecode2.e]
905
906 sendlen = 1
907 for idx, instr in enumerate(instrs):
908 yield dut.data_i[idx].eq(instr)
909 insn_type = yield instr.insn_type
910 fn_unit = yield instr.fn_unit
911 print("senddata ", idx, insn_type, fn_unit, instr)
912 yield dut.p_add_i.eq(sendlen)
913 yield
914 o_p_ready = yield dut.p_ready_o
915 while not o_p_ready:
916 yield
917 o_p_ready = yield dut.p_ready_o
918
919 yield dut.p_add_i.eq(0)
920
921
922 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
923 branch_success, branch_fail):
924 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
925 'imm_data': (imm, op_imm),
926 'read_reg1': src1, 'read_reg2': src2}]
927
928 sendlen = 1
929 for idx, instr in enumerate(instrs):
930 imm, op_imm = instr['imm_data']
931 reg1 = instr['read_reg1']
932 reg2 = instr['read_reg2']
933 dest = instr['write_reg']
934 insn_type = instr['insn_type']
935 fn_unit = instr['fn_unit']
936 yield dut.data_i[idx].insn_type.eq(insn_type)
937 yield dut.data_i[idx].fn_unit.eq(fn_unit)
938 yield dut.data_i[idx].read_reg1.data.eq(reg1)
939 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
940 yield dut.data_i[idx].read_reg2.data.eq(reg2)
941 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
942 yield dut.data_i[idx].write_reg.data.eq(dest)
943 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
944 yield dut.data_i[idx].imm_data.data.eq(imm)
945 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
946 di = yield dut.data_i[idx]
947 print("senddata %d %x" % (idx, di))
948 yield dut.p_add_i.eq(sendlen)
949 yield
950 o_p_ready = yield dut.p_ready_o
951 while not o_p_ready:
952 yield
953 o_p_ready = yield dut.p_ready_o
954
955 yield dut.p_add_i.eq(0)
956
957
958 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
959 yield from disable_issue(dut)
960 yield dut.int_dest_i.eq(dest)
961 yield dut.int_src1_i.eq(src1)
962 yield dut.int_src2_i.eq(src2)
963 if (op & (0x3 << 2)) != 0: # branch
964 yield dut.brissue.insn_i.eq(1)
965 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
966 yield dut.br_imm_i.eq(imm)
967 dut_issue = dut.brissue
968 else:
969 yield dut.aluissue.insn_i.eq(1)
970 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
971 yield dut.alu_imm_i.eq(imm)
972 dut_issue = dut.aluissue
973 yield dut.reg_enable_i.eq(1)
974
975 # these indicate that the instruction is to be made shadow-dependent on
976 # (either) branch success or branch fail
977 yield dut.branch_fail_i.eq(branch_fail)
978 yield dut.branch_succ_i.eq(branch_success)
979
980 yield
981 yield from wait_for_issue(dut, dut_issue)
982
983
984 def print_reg(dut, rnums):
985 rs = []
986 for rnum in rnums:
987 reg = yield dut.intregs.regs[rnum].reg
988 rs.append("%x" % reg)
989 rnums = map(str, rnums)
990 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
991
992
993 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
994 insts = []
995 for i in range(n_ops):
996 src1 = randint(1, dut.n_regs-1)
997 src2 = randint(1, dut.n_regs-1)
998 imm = randint(1, (1 << dut.rwid)-1)
999 dest = randint(1, dut.n_regs-1)
1000 op = randint(0, max_opnums)
1001 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
1002
1003 if shadowing:
1004 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1005 else:
1006 insts.append((src1, src2, dest, op, opi, imm))
1007 return insts
1008
1009
1010 def wait_for_busy_clear(dut):
1011 while True:
1012 busy_o = yield dut.busy_o
1013 if not busy_o:
1014 break
1015 print("busy",)
1016 yield
1017
1018
1019 def disable_issue(dut):
1020 yield dut.aluissue.insn_i.eq(0)
1021 yield dut.brissue.insn_i.eq(0)
1022 yield dut.lsissue.insn_i.eq(0)
1023
1024
1025 def wait_for_issue(dut, dut_issue):
1026 while True:
1027 issue_o = yield dut_issue.fn_issue_o
1028 if issue_o:
1029 yield from disable_issue(dut)
1030 yield dut.reg_enable_i.eq(0)
1031 break
1032 print("busy",)
1033 # yield from print_reg(dut, [1,2,3])
1034 yield
1035 # yield from print_reg(dut, [1,2,3])
1036
1037
1038 def scoreboard_branch_sim(dut, alusim):
1039
1040 iseed = 3
1041
1042 for i in range(1):
1043
1044 print("rseed", iseed)
1045 seed(iseed)
1046 iseed += 1
1047
1048 yield dut.branch_direction_o.eq(0)
1049
1050 # set random values in the registers
1051 for i in range(1, dut.n_regs):
1052 val = 31+i*3
1053 val = randint(0, (1 << alusim.rwidth)-1)
1054 yield dut.intregs.regs[i].reg.eq(val)
1055 alusim.setval(i, val)
1056
1057 if False:
1058 # create some instructions: branches create a tree
1059 insts = create_random_ops(dut, 1, True, 1)
1060 #insts.append((6, 6, 1, 2, (0, 0)))
1061 #insts.append((4, 3, 3, 0, (0, 0)))
1062
1063 src1 = randint(1, dut.n_regs-1)
1064 src2 = randint(1, dut.n_regs-1)
1065 #op = randint(4, 7)
1066 op = 4 # only BGT at the moment
1067
1068 branch_ok = create_random_ops(dut, 1, True, 1)
1069 branch_fail = create_random_ops(dut, 1, True, 1)
1070
1071 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1072
1073 if True:
1074 insts = []
1075 insts.append((3, 5, 2, 0, (0, 0)))
1076 branch_ok = []
1077 branch_fail = []
1078 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1079 branch_ok.append(None)
1080 branch_fail.append((1, 1, 2, 0, (0, 1)))
1081 #branch_fail.append( None )
1082 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1083
1084 siminsts = deepcopy(insts)
1085
1086 # issue instruction(s)
1087 i = -1
1088 instrs = insts
1089 branch_direction = 0
1090 while instrs:
1091 yield
1092 yield
1093 i += 1
1094 branch_direction = yield dut.branch_direction_o # way branch went
1095 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1096 if branch_direction == 1 and shadow_on:
1097 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1098 continue # branch was "success" and this is a "failed"... skip
1099 if branch_direction == 2 and shadow_off:
1100 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1101 continue # branch was "fail" and this is a "success"... skip
1102 if branch_direction != 0:
1103 shadow_on = 0
1104 shadow_off = 0
1105 is_branch = op >= 4
1106 if is_branch:
1107 branch_ok, branch_fail = dest
1108 dest = src2
1109 # ok zip up the branch success / fail instructions and
1110 # drop them into the queue, one marked "to have branch success"
1111 # the other to be marked shadow branch "fail".
1112 # one out of each of these will be cancelled
1113 for ok, fl in zip(branch_ok, branch_fail):
1114 if ok:
1115 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1116 if fl:
1117 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1118 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1119 (i, src1, src2, dest, op, shadow_on, shadow_off))
1120 yield from int_instr(dut, op, src1, src2, dest,
1121 shadow_on, shadow_off)
1122
1123 # wait for all instructions to stop before checking
1124 yield
1125 yield from wait_for_busy_clear(dut)
1126
1127 i = -1
1128 while siminsts:
1129 instr = siminsts.pop(0)
1130 if instr is None:
1131 continue
1132 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1133 i += 1
1134 is_branch = op >= 4
1135 if is_branch:
1136 branch_ok, branch_fail = dest
1137 dest = src2
1138 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1139 (i, src1, src2, dest, op, shadow_on, shadow_off))
1140 branch_res = alusim.op(op, src1, src2, dest)
1141 if is_branch:
1142 if branch_res:
1143 siminsts += branch_ok
1144 else:
1145 siminsts += branch_fail
1146
1147 # check status
1148 yield from alusim.check(dut)
1149 yield from alusim.dump(dut)
1150
1151
1152 def power_sim(m, dut, pdecode2, instruction, alusim):
1153
1154 seed(0)
1155
1156 for i in range(1):
1157
1158 # set random values in the registers
1159 for i in range(1, dut.n_regs):
1160 #val = randint(0, (1<<alusim.rwidth)-1)
1161 #val = 31+i*3
1162 val = i # XXX actually, not random at all
1163 yield dut.intregs.regs[i].reg.eq(val)
1164 alusim.setval(i, val)
1165
1166 # create some instructions
1167 lst = []
1168 if False:
1169 lst += ["addi 2, 0, 0x4321",
1170 "addi 3, 0, 0x1234",
1171 "add 1, 3, 2",
1172 "add 4, 3, 5"
1173 ]
1174 if True:
1175 lst += [ "lbz 6, 7(2)",
1176 ]
1177
1178 with Program(lst) as program:
1179 gen = program.generate_instructions()
1180
1181 # issue instruction(s), wait for issue to be free before proceeding
1182 for ins, code in zip(gen, program.assembly.splitlines()):
1183 yield instruction.eq(ins) # raw binary instr.
1184 yield #Delay(1e-6)
1185
1186 print("binary 0x{:X}".format(ins & 0xffffffff))
1187 print("assembly", code)
1188
1189 #alusim.op(op, opi, imm, src1, src2, dest)
1190 yield from power_instr_q(dut, pdecode2, ins, code)
1191
1192 # wait for all instructions to stop before checking
1193 while True:
1194 iqlen = yield dut.qlen_o
1195 if iqlen == 0:
1196 break
1197 yield
1198 yield
1199 yield
1200 yield
1201 yield
1202 yield from wait_for_busy_clear(dut)
1203
1204 # check status
1205 yield from alusim.check(dut)
1206 yield from alusim.dump(dut)
1207
1208
1209 def scoreboard_sim(dut, alusim):
1210
1211 seed(0)
1212
1213 for i in range(1):
1214
1215 # set random values in the registers
1216 for i in range(1, dut.n_regs):
1217 #val = randint(0, (1<<alusim.rwidth)-1)
1218 #val = 31+i*3
1219 val = i
1220 yield dut.intregs.regs[i].reg.eq(val)
1221 alusim.setval(i, val)
1222
1223 # create some instructions (some random, some regression tests)
1224 instrs = []
1225 if False:
1226 instrs = create_random_ops(dut, 15, True, 4)
1227
1228 if False: # LD/ST test (with immediate)
1229 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1230 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1231
1232 if False:
1233 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1234
1235 if False:
1236 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1237 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1238 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1239
1240 if True:
1241 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1242 0, 0, (0, 0)))
1243 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1244 0, 0, (0, 0)))
1245 if False:
1246 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1247 1, 7, (0, 0)))
1248 if False:
1249 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1250 0, 0, (0, 0)))
1251
1252 if False:
1253 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1254 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1255 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1256 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1257 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1258
1259 if False:
1260 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1261 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1262 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1263
1264 if False:
1265 instrs.append((5, 6, 2, 1))
1266 instrs.append((2, 2, 4, 0))
1267 #instrs.append((2, 2, 3, 1))
1268
1269 if False:
1270 instrs.append((2, 1, 2, 3))
1271
1272 if False:
1273 instrs.append((2, 6, 2, 1))
1274 instrs.append((2, 1, 2, 0))
1275
1276 if False:
1277 instrs.append((1, 2, 7, 2))
1278 instrs.append((7, 1, 5, 0))
1279 instrs.append((4, 4, 1, 1))
1280
1281 if False:
1282 instrs.append((5, 6, 2, 2))
1283 instrs.append((1, 1, 4, 1))
1284 instrs.append((6, 5, 3, 0))
1285
1286 if False:
1287 # Write-after-Write Hazard
1288 instrs.append((3, 6, 7, 2))
1289 instrs.append((4, 4, 7, 1))
1290
1291 if False:
1292 # self-read/write-after-write followed by Read-after-Write
1293 instrs.append((1, 1, 1, 1))
1294 instrs.append((1, 5, 3, 0))
1295
1296 if False:
1297 # Read-after-Write followed by self-read-after-write
1298 instrs.append((5, 6, 1, 2))
1299 instrs.append((1, 1, 1, 1))
1300
1301 if False:
1302 # self-read-write sandwich
1303 instrs.append((5, 6, 1, 2))
1304 instrs.append((1, 1, 1, 1))
1305 instrs.append((1, 5, 3, 0))
1306
1307 if False:
1308 # very weird failure
1309 instrs.append((5, 2, 5, 2))
1310 instrs.append((2, 6, 3, 0))
1311 instrs.append((4, 2, 2, 1))
1312
1313 if False:
1314 v1 = 4
1315 yield dut.intregs.regs[5].reg.eq(v1)
1316 alusim.setval(5, v1)
1317 yield dut.intregs.regs[3].reg.eq(5)
1318 alusim.setval(3, 5)
1319 instrs.append((5, 3, 3, 4, (0, 0)))
1320 instrs.append((4, 2, 1, 2, (0, 1)))
1321
1322 if False:
1323 v1 = 6
1324 yield dut.intregs.regs[5].reg.eq(v1)
1325 alusim.setval(5, v1)
1326 yield dut.intregs.regs[3].reg.eq(5)
1327 alusim.setval(3, 5)
1328 instrs.append((5, 3, 3, 4, (0, 0)))
1329 instrs.append((4, 2, 1, 2, (1, 0)))
1330
1331 if False:
1332 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1333 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1334 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1335 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1336 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1337 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1338 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1339 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1340 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1341
1342 # issue instruction(s), wait for issue to be free before proceeding
1343 for i, instr in enumerate(instrs):
1344 print (i, instr)
1345 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1346
1347 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1348 (i, src1, src2, dest, op, fn_unit, opi, imm))
1349 alusim.op(op, opi, imm, src1, src2, dest)
1350 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1351 br_ok, br_fail)
1352
1353 # wait for all instructions to stop before checking
1354 while True:
1355 iqlen = yield dut.qlen_o
1356 if iqlen == 0:
1357 break
1358 yield
1359 yield
1360 yield
1361 yield
1362 yield
1363 yield from wait_for_busy_clear(dut)
1364
1365 # check status
1366 yield from alusim.check(dut)
1367 yield from alusim.dump(dut)
1368
1369
1370 def test_scoreboard():
1371 regwidth = 64
1372 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1373 alusim = RegSim(regwidth, 8)
1374 memsim = MemSim(16, 8)
1375
1376 m = Module()
1377 comb = m.d.comb
1378 instruction = Signal(32)
1379
1380 # set up the decoder (and simulator, later)
1381 pdecode = create_pdecode()
1382 #simulator = ISA(pdecode, initial_regs)
1383
1384 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1385 m.submodules.sim = dut
1386
1387 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1388 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1389
1390 vl = rtlil.convert(m, ports=dut.ports())
1391 with open("test_scoreboard6600.il", "w") as f:
1392 f.write(vl)
1393
1394 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1395 vcd_name='test_powerboard6600.vcd')
1396
1397 #run_simulation(dut, scoreboard_sim(dut, alusim),
1398 # vcd_name='test_scoreboard6600.vcd')
1399
1400 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1401 # vcd_name='test_scoreboard6600.vcd')
1402
1403
1404 if __name__ == '__main__':
1405 test_scoreboard()