95f12dc37c2105b69a9d8b144d27664b62f0df37
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, ortreereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst_multi import LDSTCompUnit
20 from soc.experiment.compldst_multi import CompLDSTOpSubset
21 from soc.experiment.l0_cache import TstL0CacheBuffer
22
23 from soc.experiment.alu_hier import ALU, BranchALU
24 from soc.fu.alu.alu_input_record import CompALUOpSubset
25
26 from soc.decoder.power_enums import InternalOp, Function
27 from soc.decoder.power_decoder import (create_pdecode)
28 from soc.decoder.power_decoder2 import (PowerDecode2)
29 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
30
31 from soc.simulator.program import Program
32
33
34 from nmutil.latch import SRLatch
35 from nmutil.nmoperator import eq
36
37 from random import randint, seed
38 from copy import deepcopy
39 from math import log
40
41 from soc.experiment.sim import RegSim, MemSim
42 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
43
44
45 class CompUnitsBase(Elaboratable):
46 """ Computation Unit Base class.
47
48 Amazingly, this class works recursively. It's supposed to just
49 look after some ALUs (that can handle the same operations),
50 grouping them together, however it turns out that the same code
51 can also group *groups* of Computation Units together as well.
52
53 Basically it was intended just to concatenate the ALU's issue,
54 go_rd etc. signals together, which start out as bits and become
55 sequences. Turns out that the same trick works just as well
56 on Computation Units!
57
58 So this class may be used recursively to present a top-level
59 sequential concatenation of all the signals in and out of
60 ALUs, whilst at the same time making it convenient to group
61 ALUs together.
62
63 At the lower level, the intent is that groups of (identical)
64 ALUs may be passed the same operation. Even beyond that,
65 the intent is that that group of (identical) ALUs actually
66 share the *same pipeline* and as such become a "Concurrent
67 Computation Unit" as defined by Mitch Alsup (see section
68 11.4.9.3)
69 """
70
71 def __init__(self, rwid, units, ldstmode=False):
72 """ Inputs:
73
74 * :rwid: bit width of register file(s) - both FP and INT
75 * :units: sequence of ALUs (or CompUnitsBase derivatives)
76 """
77 self.units = units
78 self.ldstmode = ldstmode
79 self.rwid = rwid
80 self.rwid = rwid
81 if units and isinstance(units[0], CompUnitsBase):
82 self.n_units = 0
83 for u in self.units:
84 self.n_units += u.n_units
85 else:
86 self.n_units = len(units)
87
88 n_units = self.n_units
89
90 # inputs
91 self.issue_i = Signal(n_units, reset_less=True)
92 self.rd0 = go_record(n_units, "rd0")
93 self.rd1 = go_record(n_units, "rd1")
94 self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
95 self.wr0 = go_record(n_units, "wr0")
96 self.go_wr_i = [self.wr0.go]
97 self.shadown_i = Signal(n_units, reset_less=True)
98 self.go_die_i = Signal(n_units, reset_less=True)
99 if ldstmode:
100 self.go_ad_i = Signal(n_units, reset_less=True)
101 self.go_st_i = Signal(n_units, reset_less=True)
102
103 # outputs
104 self.busy_o = Signal(n_units, reset_less=True)
105 self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
106 self.req_rel_o = self.wr0.rel
107 self.done_o = Signal(n_units, reset_less=True)
108 if ldstmode:
109 self.ld_o = Signal(n_units, reset_less=True) # op is LD
110 self.st_o = Signal(n_units, reset_less=True) # op is ST
111 self.adr_rel_o = Signal(n_units, reset_less=True)
112 self.sto_rel_o = Signal(n_units, reset_less=True)
113 self.load_mem_o = Signal(n_units, reset_less=True)
114 self.stwd_mem_o = Signal(n_units, reset_less=True)
115 self.addr_o = Signal(rwid, reset_less=True)
116
117 # in/out register data (note: not register#, actual data)
118 self.data_o = Signal(rwid, reset_less=True)
119 self.src1_i = Signal(rwid, reset_less=True)
120 self.src2_i = Signal(rwid, reset_less=True)
121 # input operand
122
123 def elaborate(self, platform):
124 m = Module()
125 comb = m.d.comb
126
127 for i, alu in enumerate(self.units):
128 setattr(m.submodules, "comp%d" % i, alu)
129
130 go_rd_l0 = []
131 go_rd_l1 = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 done_l = []
137 rd_rel0_l = []
138 rd_rel1_l = []
139 shadow_l = []
140 godie_l = []
141 for alu in self.units:
142 req_rel_l.append(alu.req_rel_o)
143 done_l.append(alu.done_o)
144 shadow_l.append(alu.shadown_i)
145 godie_l.append(alu.go_die_i)
146 print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
147 rd_rel0_l.append(alu.rd_rel_o[0])
148 rd_rel1_l.append(alu.rd_rel_o[1])
149 go_wr_l.append(alu.go_wr_i)
150 go_rd_l0.append(alu.go_rd_i[0])
151 go_rd_l1.append(alu.go_rd_i[1])
152 issue_l.append(alu.issue_i)
153 busy_l.append(alu.busy_o)
154 comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
155 comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
156 comb += self.req_rel_o.eq(Cat(*req_rel_l))
157 comb += self.done_o.eq(Cat(*done_l))
158 comb += self.busy_o.eq(Cat(*busy_l))
159 comb += Cat(*godie_l).eq(self.go_die_i)
160 comb += Cat(*shadow_l).eq(self.shadown_i)
161 comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
162 comb += Cat(*go_rd_l0).eq(self.rd0.go)
163 comb += Cat(*go_rd_l1).eq(self.rd1.go)
164 comb += Cat(*issue_l).eq(self.issue_i)
165
166 # connect data register input/output
167
168 # merge (OR) all integer FU / ALU outputs to a single value
169 # XXX NOTE: this only works because there is a single "port"
170 # protected by a single go_wr. multi-issue requires a bus
171 # to be inserted here.
172 if self.units:
173 data_o = ortreereduce(self.units, "data_o")
174 comb += self.data_o.eq(data_o)
175 if self.ldstmode:
176 addr_o = ortreereduce(self.units, "addr_o")
177 comb += self.addr_o.eq(addr_o)
178
179 for i, alu in enumerate(self.units):
180 comb += alu.src1_i.eq(self.src1_i)
181 comb += alu.src2_i.eq(self.src2_i)
182
183 if not self.ldstmode:
184 return m
185
186 ldmem_l = []
187 stmem_l = []
188 go_ad_l = []
189 go_st_l = []
190 ld_l = []
191 st_l = []
192 adr_rel_l = []
193 sto_rel_l = []
194 for alu in self.units:
195 ld_l.append(alu.ld_o)
196 st_l.append(alu.st_o)
197 adr_rel_l.append(alu.adr_rel_o)
198 sto_rel_l.append(alu.sto_rel_o)
199 ldmem_l.append(alu.load_mem_o)
200 stmem_l.append(alu.stwd_mem_o)
201 go_ad_l.append(alu.go_ad_i)
202 go_st_l.append(alu.go_st_i)
203 comb += self.ld_o.eq(Cat(*ld_l))
204 comb += self.st_o.eq(Cat(*st_l))
205 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
206 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
207 comb += self.load_mem_o.eq(Cat(*ldmem_l))
208 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
209 comb += Cat(*go_ad_l).eq(self.go_ad_i)
210 comb += Cat(*go_st_l).eq(self.go_st_i)
211
212 return m
213
214
215 class CompUnitLDSTs(CompUnitsBase):
216
217 def __init__(self, rwid, opwid, n_ldsts, l0):
218 """ Inputs:
219
220 * :rwid: bit width of register file(s) - both FP and INT
221 * :opwid: operand bit width
222 """
223 self.opwid = opwid
224
225 # inputs
226 self.op = CompLDSTOpSubset("cul_i")
227
228 # LD/ST Units
229 units = []
230 for i in range(n_ldsts):
231 pi = l0.l0.dports[i].pi
232 units.append(LDSTCompUnit(pi, rwid, awid=48))
233
234 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
235
236 def elaborate(self, platform):
237 m = CompUnitsBase.elaborate(self, platform)
238 comb = m.d.comb
239
240 # hand the same operation to all units
241 for ldst in self.units:
242 comb += ldst.oper_i.eq(self.op)
243
244 return m
245
246
247 class CompUnitALUs(CompUnitsBase):
248
249 def __init__(self, rwid, opwid, n_alus):
250 """ Inputs:
251
252 * :rwid: bit width of register file(s) - both FP and INT
253 * :opwid: operand bit width
254 """
255 self.opwid = opwid
256
257 # inputs
258 self.op = CompALUOpSubset("cua_i")
259
260 # Int ALUs
261 alus = []
262 for i in range(n_alus):
263 alus.append(ALU(rwid))
264
265 units = []
266 for alu in alus:
267 aluopwid = 3 # extra bit for immediate mode
268 units.append(MultiCompUnit(rwid, alu, CompALUOpSubset))
269
270 CompUnitsBase.__init__(self, rwid, units)
271
272 def elaborate(self, platform):
273 m = CompUnitsBase.elaborate(self, platform)
274 comb = m.d.comb
275
276 # hand the subset of operation to ALUs
277 for alu in self.units:
278 comb += alu.oper_i.eq(self.op)
279
280 return m
281
282
283 class CompUnitBR(CompUnitsBase):
284
285 def __init__(self, rwid, opwid):
286 """ Inputs:
287
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :opwid: operand bit width
290
291 Note: bgt unit is returned so that a shadow unit can be created
292 for it
293 """
294 self.opwid = opwid
295
296 # inputs
297 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
298 self.oper_i = Signal(opwid, reset_less=True)
299 self.imm_i = Signal(rwid, reset_less=True)
300
301 # Branch ALU and CU
302 self.bgt = BranchALU(rwid)
303 aluopwid = 3 # extra bit for immediate mode
304 self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
305 CompUnitsBase.__init__(self, rwid, [self.br1])
306
307 def elaborate(self, platform):
308 m = CompUnitsBase.elaborate(self, platform)
309 comb = m.d.comb
310
311 # hand the same operation to all units
312 for alu in self.units:
313 #comb += alu.oper_i.eq(self.op) # TODO
314 comb += alu.oper_i.eq(self.oper_i)
315 #comb += alu.imm_i.eq(self.imm_i)
316
317 return m
318
319
320 class FunctionUnits(Elaboratable):
321
322 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
323 self.n_src, self.n_dst = n_src, n_dst
324 self.n_reg = n_reg
325 self.n_int_alus = nf = n_int_alus
326
327 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
328 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
329
330 self.readable_o = Signal(n_int_alus, reset_less=True)
331 self.writable_o = Signal(n_int_alus, reset_less=True)
332
333 # arrays
334 src = []
335 rsel = []
336 rd = []
337 for i in range(n_src):
338 j = i + 1 # name numbering to match src1/src2
339 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
340 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
341 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
342 dst = []
343 dsel = []
344 wr = []
345 for i in range(n_dst):
346 j = i + 1 # name numbering to match src1/src2
347 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
348 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
349 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
350 wpnd = []
351 pend = []
352 for i in range(nf):
353 j = i + 1 # name numbering to match src1/src2
354 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
355 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
356
357 self.dest_i = Array(dst) # Dest in (top)
358 self.src_i = Array(src) # oper in (top)
359
360 # for Register File Select Lines (horizontal), per-reg
361 self.dst_rsel_o = Array(dsel) # dest reg (bot)
362 self.src_rsel_o = Array(rsel) # src reg (bot)
363
364 self.go_rd_i = Array(rd)
365 self.go_wr_i = Array(wr)
366
367 self.go_die_i = Signal(n_int_alus, reset_less=True)
368 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
369
370 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
371
372 def elaborate(self, platform):
373 m = Module()
374 comb = m.d.comb
375 sync = m.d.sync
376
377 n_intfus = self.n_int_alus
378
379 # Integer FU-FU Dep Matrix
380 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
381 m.submodules.intfudeps = intfudeps
382 # Integer FU-Reg Dep Matrix
383 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
384 m.submodules.intregdeps = intregdeps
385
386 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
387 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
388
389 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
390 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
391
392 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
393 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
394 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
395
396 comb += intfudeps.issue_i.eq(self.fn_issue_i)
397 comb += intfudeps.go_die_i.eq(self.go_die_i)
398 comb += self.readable_o.eq(intfudeps.readable_o)
399 comb += self.writable_o.eq(intfudeps.writable_o)
400
401 # Connect function issue / arrays, and dest/src1/src2
402 for i in range(self.n_src):
403 print (i, self.go_rd_i, intfudeps.go_rd_i)
404 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
405 comb += intregdeps.src_i[i].eq(self.src_i[i])
406 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
407 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
408 for i in range(self.n_dst):
409 print (i, self.go_wr_i, intfudeps.go_wr_i)
410 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
411 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
412 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
413 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
414 comb += intregdeps.go_die_i.eq(self.go_die_i)
415 comb += intregdeps.issue_i.eq(self.fn_issue_i)
416
417 return m
418
419
420 class Scoreboard(Elaboratable):
421 def __init__(self, rwid, n_regs):
422 """ Inputs:
423
424 * :rwid: bit width of register file(s) - both FP and INT
425 * :n_regs: depth of register file(s) - number of FP and INT regs
426 """
427 self.rwid = rwid
428 self.n_regs = n_regs
429
430 # Register Files
431 self.intregs = RegFileArray(rwid, n_regs)
432 self.fpregs = RegFileArray(rwid, n_regs)
433
434 # Memory (test for now)
435 self.l0 = TstL0CacheBuffer()
436
437 # issue q needs to get at these
438 self.aluissue = IssueUnitGroup(2)
439 self.lsissue = IssueUnitGroup(2)
440 self.brissue = IssueUnitGroup(1)
441 # and these
442 self.instr = Decode2ToExecute1Type("sc_instr")
443 self.br_oper_i = Signal(4, reset_less=True)
444 self.br_imm_i = Signal(rwid, reset_less=True)
445 self.ls_oper_i = Signal(4, reset_less=True)
446
447 # inputs
448 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
449 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
450 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
451 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
452
453 # outputs
454 self.issue_o = Signal(reset_less=True) # instruction was accepted
455 self.busy_o = Signal(reset_less=True) # at least one CU is busy
456
457 # for branch speculation experiment. branch_direction = 0 if
458 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
459 # branch_succ and branch_fail are requests to have the current
460 # instruction be dependent on the branch unit "shadow" capability.
461 self.branch_succ_i = Signal(reset_less=True)
462 self.branch_fail_i = Signal(reset_less=True)
463 self.branch_direction_o = Signal(2, reset_less=True)
464
465 def elaborate(self, platform):
466 m = Module()
467 comb = m.d.comb
468 sync = m.d.sync
469
470 m.submodules.intregs = self.intregs
471 m.submodules.fpregs = self.fpregs
472 m.submodules.l0 = l0 = self.l0
473
474 # register ports
475 int_dest = self.intregs.write_port("dest")
476 int_src1 = self.intregs.read_port("src1")
477 int_src2 = self.intregs.read_port("src2")
478
479 fp_dest = self.fpregs.write_port("dest")
480 fp_src1 = self.fpregs.read_port("src1")
481 fp_src2 = self.fpregs.read_port("src2")
482
483 # Int ALUs and BR ALUs
484 n_int_alus = 5
485 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
486 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
487
488 # LDST Comp Units
489 n_ldsts = 2
490 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
491
492 # Comp Units
493 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
494 bgt = cub.bgt # get at the branch computation unit
495 br1 = cub.br1
496
497 # Int FUs
498 fu_n_src = 2
499 fu_n_dst = 1
500 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
501 fu_n_src, fu_n_dst)
502
503 # Memory FUs
504 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
505
506 # Memory Priority Picker 1: one gateway per memory port
507 # picks 1 reader and 1 writer to intreg
508 mempick1 = GroupPicker(n_ldsts, 1, 1)
509 m.submodules.mempick1 = mempick1
510
511 # Count of number of FUs
512 n_intfus = n_int_alus
513 n_fp_fus = 0 # for now
514
515 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
516 # picks 1 reader and 1 writer to intreg
517 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
518 m.submodules.intpick1 = ipick1
519
520 # INT/FP Issue Unit
521 regdecode = RegDecode(self.n_regs)
522 m.submodules.regdecode = regdecode
523 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
524 m.submodules.issueunit = issueunit
525
526 # Shadow Matrix. currently n_intfus shadows, to be used for
527 # write-after-write hazards. NOTE: there is one extra for branches,
528 # so the shadow width is increased by 1
529 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
530 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
531
532 # record previous instruction to cast shadow on current instruction
533 prev_shadow = Signal(n_intfus)
534
535 # Branch Speculation recorder. tracks the success/fail state as
536 # each instruction is issued, so that when the branch occurs the
537 # allow/cancel can be issued as appropriate.
538 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
539
540 # ---------
541 # ok start wiring things together...
542 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
543 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
544 # ---------
545
546 # ---------
547 # Issue Unit is where it starts. set up some in/outs for this module
548 # ---------
549 comb += [regdecode.dest_i.eq(self.int_dest_i),
550 regdecode.src1_i.eq(self.int_src1_i),
551 regdecode.src2_i.eq(self.int_src2_i),
552 regdecode.enable_i.eq(self.reg_enable_i),
553 self.issue_o.eq(issueunit.issue_o)
554 ]
555
556 # take these to outside (issue needs them)
557 comb += cua.op.eq_from_execute1(self.instr)
558 comb += cub.oper_i.eq(self.br_oper_i)
559 comb += cub.imm_i.eq(self.br_imm_i)
560 comb += cul.op.eq_from_execute1(self.instr)
561
562 # TODO: issueunit.f (FP)
563
564 # and int function issue / busy arrays, and dest/src1/src2
565 comb += intfus.dest_i[0].eq(regdecode.dest_o)
566 comb += intfus.src_i[0].eq(regdecode.src1_o)
567 comb += intfus.src_i[1].eq(regdecode.src2_o)
568
569 fn_issue_o = issueunit.fn_issue_o
570
571 comb += intfus.fn_issue_i.eq(fn_issue_o)
572 comb += issueunit.busy_i.eq(cu.busy_o)
573 comb += self.busy_o.eq(cu.busy_o.bool())
574
575 # ---------
576 # Memory Function Unit
577 # ---------
578 reset_b = Signal(cul.n_units, reset_less=True)
579 # XXX was cul.go_wr_i not done.o
580 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
581 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
582
583 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
584 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
585 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
586
587 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
588 # in a transitive fashion). This cycle activates based on LDSTCompUnit
589 # issue_i. multi-issue gets a bit more complex but not a lot.
590 prior_ldsts = Signal(cul.n_units, reset_less=True)
591 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
592 with m.If(self.ls_oper_i[3]): # LD bit of operand
593 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
594 with m.If(self.ls_oper_i[2]): # ST bit of operand
595 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
596
597 # TODO: adr_rel_o needs to go into L1 Cache. for now,
598 # just immediately activate go_adr
599 sync += cul.go_ad_i.eq(cul.adr_rel_o)
600
601 # connect up address data
602 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
603 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
604
605 # connect loadable / storable to go_ld/go_st.
606 # XXX should only be done when the memory ld/st has actually happened!
607 go_st_i = Signal(cul.n_units, reset_less=True)
608 go_ld_i = Signal(cul.n_units, reset_less=True)
609 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
610 cul.adr_rel_o & cul.ld_o)
611 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
612 cul.sto_rel_o & cul.st_o)
613 comb += memfus.go_ld_i.eq(go_ld_i)
614 comb += memfus.go_st_i.eq(go_st_i)
615 #comb += cul.go_wr_i.eq(go_ld_i)
616 comb += cul.go_st_i.eq(go_st_i)
617
618 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
619 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
620 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
621
622 # ---------
623 # merge shadow matrices outputs
624 # ---------
625
626 # these are explained in ShadowMatrix docstring, and are to be
627 # connected to the FUReg and FUFU Matrices, to get them to reset
628 anydie = Signal(n_intfus, reset_less=True)
629 allshadown = Signal(n_intfus, reset_less=True)
630 shreset = Signal(n_intfus, reset_less=True)
631 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
632 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
633 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
634
635 # ---------
636 # connect fu-fu matrix
637 # ---------
638
639 # Group Picker... done manually for now.
640 go_rd_o = ipick1.go_rd_o
641 go_wr_o = ipick1.go_wr_o
642 go_rd_i = intfus.go_rd_i
643 go_wr_i = intfus.go_wr_i
644 go_die_i = intfus.go_die_i
645 # NOTE: connect to the shadowed versions so that they can "die" (reset)
646 for i in range(fu_n_src):
647 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
648 for i in range(fu_n_dst):
649 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
650 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
651
652 # Connect Picker
653 # ---------
654 int_rd_o = intfus.readable_o
655 rrel_o = cu.rd_rel_o
656 rqrl_o = cu.req_rel_o
657 for i in range(fu_n_src):
658 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
659 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
660 int_wr_o = intfus.writable_o
661 for i in range(fu_n_dst):
662 # XXX FIXME: rqrl_o[i] here
663 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
664 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
665
666 # ---------
667 # Shadow Matrix
668 # ---------
669
670 comb += shadows.issue_i.eq(fn_issue_o)
671 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
672 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
673 # ---------
674 # NOTE; this setup is for the instruction order preservation...
675
676 # connect shadows / go_dies to Computation Units
677 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
678 comb += cu.go_die_i[0:n_intfus].eq(anydie)
679
680 # ok connect first n_int_fu shadows to busy lines, to create an
681 # instruction-order linked-list-like arrangement, using a bit-matrix
682 # (instead of e.g. a ring buffer).
683
684 # when written, the shadow can be cancelled (and was good)
685 for i in range(n_intfus):
686 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
687 # XXX experiment: use ~cu.busy_o instead. *should* be good
688 # because the comp unit is only free once completed
689 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
690
691 # *previous* instruction shadows *current* instruction, and, obviously,
692 # if the previous is completed (!busy) don't cast the shadow!
693 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
694 for i in range(n_intfus):
695 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
696
697 # ---------
698 # ... and this is for branch speculation. it uses the extra bit
699 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
700 # only needs to set shadow_i, s_fail_i and s_good_i
701
702 # issue captures shadow_i (if enabled)
703 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
704
705 bactive = Signal(reset_less=True)
706 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
707
708 # instruction being issued (fn_issue_o) has a shadow cast by the branch
709 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
710 comb += bshadow.issue_i.eq(fn_issue_o)
711 for i in range(n_intfus):
712 with m.If(fn_issue_o & (Const(1 << i))):
713 comb += bshadow.shadow_i[i][0].eq(1)
714
715 # finally, we need an indicator to the test infrastructure as to
716 # whether the branch succeeded or failed, plus, link up to the
717 # "recorder" of whether the instruction was under shadow or not
718
719 with m.If(br1.issue_i):
720 sync += bspec.active_i.eq(1)
721 with m.If(self.branch_succ_i):
722 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
723 with m.If(self.branch_fail_i):
724 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
725
726 # branch is active (TODO: a better signal: this is over-using the
727 # go_write signal - actually the branch should not be "writing")
728 with m.If(br1.go_wr_i):
729 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
730 sync += bspec.active_i.eq(0)
731 comb += bspec.br_i.eq(1)
732 # branch occurs if data == 1, failed if data == 0
733 comb += bspec.br_ok_i.eq(br1.data_o == 1)
734 for i in range(n_intfus):
735 # *expected* direction of the branch matched against *actual*
736 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
737 # ... or it didn't
738 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
739
740 # ---------
741 # Connect Register File(s)
742 # ---------
743 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
744 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
745 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
746
747 # connect ALUs to regfile
748 comb += int_dest.data_i.eq(cu.data_o)
749 comb += cu.src1_i.eq(int_src1.data_o)
750 comb += cu.src2_i.eq(int_src2.data_o)
751
752 # connect ALU Computation Units
753 for i in range(fu_n_src):
754 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
755 for i in range(fu_n_dst):
756 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
757 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
758
759 return m
760
761 def __iter__(self):
762 yield from self.intregs
763 yield from self.fpregs
764 yield self.int_dest_i
765 yield self.int_src1_i
766 yield self.int_src2_i
767 yield self.issue_o
768 yield self.branch_succ_i
769 yield self.branch_fail_i
770 yield self.branch_direction_o
771
772 def ports(self):
773 return list(self)
774
775
776 class IssueToScoreboard(Elaboratable):
777
778 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
779 self.qlen = qlen
780 self.n_in = n_in
781 self.n_out = n_out
782 self.rwid = rwid
783 self.opw = opwid
784 self.n_regs = n_regs
785
786 mqbits = unsigned(int(log(qlen) / log(2))+2)
787 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
788 self.p_ready_o = Signal() # instructions were added
789 self.data_i = Instruction._nq(n_in, "data_i")
790
791 self.busy_o = Signal(reset_less=True) # at least one CU is busy
792 self.qlen_o = Signal(mqbits, reset_less=True)
793
794 def elaborate(self, platform):
795 m = Module()
796 comb = m.d.comb
797 sync = m.d.sync
798
799 iq = InstructionQ(self.rwid, self.opw, self.qlen,
800 self.n_in, self.n_out)
801 sc = Scoreboard(self.rwid, self.n_regs)
802 m.submodules.iq = iq
803 m.submodules.sc = sc
804
805 # get at the regfile for testing
806 self.intregs = sc.intregs
807
808 # and the "busy" signal and instruction queue length
809 comb += self.busy_o.eq(sc.busy_o)
810 comb += self.qlen_o.eq(iq.qlen_o)
811
812 # link up instruction queue
813 comb += iq.p_add_i.eq(self.p_add_i)
814 comb += self.p_ready_o.eq(iq.p_ready_o)
815 for i in range(self.n_in):
816 comb += eq(iq.data_i[i], self.data_i[i])
817
818 # take instruction and process it. note that it's possible to
819 # "inspect" the queue contents *without* actually removing the
820 # items. items are only removed when the
821
822 # in "waiting" state
823 wait_issue_br = Signal()
824 wait_issue_alu = Signal()
825 wait_issue_ls = Signal()
826
827 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
828 # set instruction pop length to 1 if the unit accepted
829 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
830 with m.If(iq.qlen_o != 0):
831 comb += iq.n_sub_i.eq(1)
832 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
833 with m.If(iq.qlen_o != 0):
834 comb += iq.n_sub_i.eq(1)
835 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
836 with m.If(iq.qlen_o != 0):
837 comb += iq.n_sub_i.eq(1)
838
839 # see if some instruction(s) are here. note that this is
840 # "inspecting" the in-place queue. note also that on the
841 # cycle following "waiting" for fn_issue_o to be set, the
842 # "resetting" done above (insn_i=0) could be re-ASSERTed.
843 with m.If(iq.qlen_o != 0):
844 # get the operands and operation
845 instr = iq.data_o[0]
846 imm = instr.imm_data.data
847 dest = instr.write_reg.data
848 src1 = instr.read_reg1.data
849 src2 = instr.read_reg2.data
850 op = instr.insn_type
851 fu = instr.fn_unit
852 opi = instr.imm_data.ok # immediate set
853
854 # set the src/dest regs
855 comb += sc.int_dest_i.eq(dest)
856 comb += sc.int_src1_i.eq(src1)
857 comb += sc.int_src2_i.eq(src2)
858 comb += sc.reg_enable_i.eq(1) # enable the regfile
859 comb += sc.instr.eq(instr)
860
861 # choose a Function-Unit-Group
862 with m.If(fu == Function.ALU): # alu
863 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
864 comb += wait_issue_alu.eq(1)
865 with m.Elif(fu == Function.LDST): # ld/st
866 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
867 comb += wait_issue_ls.eq(1)
868
869 with m.Elif((op & (0x3 << 2)) != 0): # branch
870 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
871 comb += sc.br_imm_i.eq(imm)
872 comb += sc.brissue.insn_i.eq(1)
873 comb += wait_issue_br.eq(1)
874 # XXX TODO
875 # these indicate that the instruction is to be made
876 # shadow-dependent on
877 # (either) branch success or branch fail
878 # yield sc.branch_fail_i.eq(branch_fail)
879 # yield sc.branch_succ_i.eq(branch_success)
880
881 return m
882
883 def __iter__(self):
884 yield self.p_ready_o
885 for o in self.data_i:
886 yield from list(o)
887 yield self.p_add_i
888
889 def ports(self):
890 return list(self)
891
892
893 def power_instr_q(dut, pdecode2, ins, code):
894 instrs = [pdecode2.e]
895
896 sendlen = 1
897 for idx, instr in enumerate(instrs):
898 yield dut.data_i[idx].eq(instr)
899 insn_type = yield instr.insn_type
900 fn_unit = yield instr.fn_unit
901 print("senddata ", idx, insn_type, fn_unit, instr)
902 yield dut.p_add_i.eq(sendlen)
903 yield
904 o_p_ready = yield dut.p_ready_o
905 while not o_p_ready:
906 yield
907 o_p_ready = yield dut.p_ready_o
908
909 yield dut.p_add_i.eq(0)
910
911
912 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
913 branch_success, branch_fail):
914 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
915 'imm_data': (imm, op_imm),
916 'read_reg1': src1, 'read_reg2': src2}]
917
918 sendlen = 1
919 for idx, instr in enumerate(instrs):
920 imm, op_imm = instr['imm_data']
921 reg1 = instr['read_reg1']
922 reg2 = instr['read_reg2']
923 dest = instr['write_reg']
924 insn_type = instr['insn_type']
925 fn_unit = instr['fn_unit']
926 yield dut.data_i[idx].insn_type.eq(insn_type)
927 yield dut.data_i[idx].fn_unit.eq(fn_unit)
928 yield dut.data_i[idx].read_reg1.data.eq(reg1)
929 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
930 yield dut.data_i[idx].read_reg2.data.eq(reg2)
931 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
932 yield dut.data_i[idx].write_reg.data.eq(dest)
933 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
934 yield dut.data_i[idx].imm_data.data.eq(imm)
935 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
936 di = yield dut.data_i[idx]
937 print("senddata %d %x" % (idx, di))
938 yield dut.p_add_i.eq(sendlen)
939 yield
940 o_p_ready = yield dut.p_ready_o
941 while not o_p_ready:
942 yield
943 o_p_ready = yield dut.p_ready_o
944
945 yield dut.p_add_i.eq(0)
946
947
948 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
949 yield from disable_issue(dut)
950 yield dut.int_dest_i.eq(dest)
951 yield dut.int_src1_i.eq(src1)
952 yield dut.int_src2_i.eq(src2)
953 if (op & (0x3 << 2)) != 0: # branch
954 yield dut.brissue.insn_i.eq(1)
955 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
956 yield dut.br_imm_i.eq(imm)
957 dut_issue = dut.brissue
958 else:
959 yield dut.aluissue.insn_i.eq(1)
960 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
961 yield dut.alu_imm_i.eq(imm)
962 dut_issue = dut.aluissue
963 yield dut.reg_enable_i.eq(1)
964
965 # these indicate that the instruction is to be made shadow-dependent on
966 # (either) branch success or branch fail
967 yield dut.branch_fail_i.eq(branch_fail)
968 yield dut.branch_succ_i.eq(branch_success)
969
970 yield
971 yield from wait_for_issue(dut, dut_issue)
972
973
974 def print_reg(dut, rnums):
975 rs = []
976 for rnum in rnums:
977 reg = yield dut.intregs.regs[rnum].reg
978 rs.append("%x" % reg)
979 rnums = map(str, rnums)
980 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
981
982
983 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
984 insts = []
985 for i in range(n_ops):
986 src1 = randint(1, dut.n_regs-1)
987 src2 = randint(1, dut.n_regs-1)
988 imm = randint(1, (1 << dut.rwid)-1)
989 dest = randint(1, dut.n_regs-1)
990 op = randint(0, max_opnums)
991 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
992
993 if shadowing:
994 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
995 else:
996 insts.append((src1, src2, dest, op, opi, imm))
997 return insts
998
999
1000 def wait_for_busy_clear(dut):
1001 while True:
1002 busy_o = yield dut.busy_o
1003 if not busy_o:
1004 break
1005 print("busy",)
1006 yield
1007
1008
1009 def disable_issue(dut):
1010 yield dut.aluissue.insn_i.eq(0)
1011 yield dut.brissue.insn_i.eq(0)
1012 yield dut.lsissue.insn_i.eq(0)
1013
1014
1015 def wait_for_issue(dut, dut_issue):
1016 while True:
1017 issue_o = yield dut_issue.fn_issue_o
1018 if issue_o:
1019 yield from disable_issue(dut)
1020 yield dut.reg_enable_i.eq(0)
1021 break
1022 print("busy",)
1023 # yield from print_reg(dut, [1,2,3])
1024 yield
1025 # yield from print_reg(dut, [1,2,3])
1026
1027
1028 def scoreboard_branch_sim(dut, alusim):
1029
1030 iseed = 3
1031
1032 for i in range(1):
1033
1034 print("rseed", iseed)
1035 seed(iseed)
1036 iseed += 1
1037
1038 yield dut.branch_direction_o.eq(0)
1039
1040 # set random values in the registers
1041 for i in range(1, dut.n_regs):
1042 val = 31+i*3
1043 val = randint(0, (1 << alusim.rwidth)-1)
1044 yield dut.intregs.regs[i].reg.eq(val)
1045 alusim.setval(i, val)
1046
1047 if False:
1048 # create some instructions: branches create a tree
1049 insts = create_random_ops(dut, 1, True, 1)
1050 #insts.append((6, 6, 1, 2, (0, 0)))
1051 #insts.append((4, 3, 3, 0, (0, 0)))
1052
1053 src1 = randint(1, dut.n_regs-1)
1054 src2 = randint(1, dut.n_regs-1)
1055 #op = randint(4, 7)
1056 op = 4 # only BGT at the moment
1057
1058 branch_ok = create_random_ops(dut, 1, True, 1)
1059 branch_fail = create_random_ops(dut, 1, True, 1)
1060
1061 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1062
1063 if True:
1064 insts = []
1065 insts.append((3, 5, 2, 0, (0, 0)))
1066 branch_ok = []
1067 branch_fail = []
1068 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1069 branch_ok.append(None)
1070 branch_fail.append((1, 1, 2, 0, (0, 1)))
1071 #branch_fail.append( None )
1072 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1073
1074 siminsts = deepcopy(insts)
1075
1076 # issue instruction(s)
1077 i = -1
1078 instrs = insts
1079 branch_direction = 0
1080 while instrs:
1081 yield
1082 yield
1083 i += 1
1084 branch_direction = yield dut.branch_direction_o # way branch went
1085 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1086 if branch_direction == 1 and shadow_on:
1087 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1088 continue # branch was "success" and this is a "failed"... skip
1089 if branch_direction == 2 and shadow_off:
1090 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1091 continue # branch was "fail" and this is a "success"... skip
1092 if branch_direction != 0:
1093 shadow_on = 0
1094 shadow_off = 0
1095 is_branch = op >= 4
1096 if is_branch:
1097 branch_ok, branch_fail = dest
1098 dest = src2
1099 # ok zip up the branch success / fail instructions and
1100 # drop them into the queue, one marked "to have branch success"
1101 # the other to be marked shadow branch "fail".
1102 # one out of each of these will be cancelled
1103 for ok, fl in zip(branch_ok, branch_fail):
1104 if ok:
1105 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1106 if fl:
1107 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1108 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1109 (i, src1, src2, dest, op, shadow_on, shadow_off))
1110 yield from int_instr(dut, op, src1, src2, dest,
1111 shadow_on, shadow_off)
1112
1113 # wait for all instructions to stop before checking
1114 yield
1115 yield from wait_for_busy_clear(dut)
1116
1117 i = -1
1118 while siminsts:
1119 instr = siminsts.pop(0)
1120 if instr is None:
1121 continue
1122 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1123 i += 1
1124 is_branch = op >= 4
1125 if is_branch:
1126 branch_ok, branch_fail = dest
1127 dest = src2
1128 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1129 (i, src1, src2, dest, op, shadow_on, shadow_off))
1130 branch_res = alusim.op(op, src1, src2, dest)
1131 if is_branch:
1132 if branch_res:
1133 siminsts += branch_ok
1134 else:
1135 siminsts += branch_fail
1136
1137 # check status
1138 yield from alusim.check(dut)
1139 yield from alusim.dump(dut)
1140
1141
1142 def power_sim(m, dut, pdecode2, instruction, alusim):
1143
1144 seed(0)
1145
1146 for i in range(1):
1147
1148 # set random values in the registers
1149 for i in range(1, dut.n_regs):
1150 #val = randint(0, (1<<alusim.rwidth)-1)
1151 #val = 31+i*3
1152 val = i # XXX actually, not random at all
1153 yield dut.intregs.regs[i].reg.eq(val)
1154 alusim.setval(i, val)
1155
1156 # create some instructions
1157 lst = []
1158 if False:
1159 lst += ["addi 2, 0, 0x4321",
1160 "addi 3, 0, 0x1234",
1161 "add 1, 3, 2",
1162 "add 4, 3, 5"
1163 ]
1164 if True:
1165 lst += [ "lbzu 6, 7(2)",
1166
1167 ]
1168
1169 with Program(lst) as program:
1170 gen = program.generate_instructions()
1171
1172 # issue instruction(s), wait for issue to be free before proceeding
1173 for ins, code in zip(gen, program.assembly.splitlines()):
1174 yield instruction.eq(ins) # raw binary instr.
1175 yield #Delay(1e-6)
1176
1177 print("binary 0x{:X}".format(ins & 0xffffffff))
1178 print("assembly", code)
1179
1180 #alusim.op(op, opi, imm, src1, src2, dest)
1181 yield from power_instr_q(dut, pdecode2, ins, code)
1182
1183 # wait for all instructions to stop before checking
1184 while True:
1185 iqlen = yield dut.qlen_o
1186 if iqlen == 0:
1187 break
1188 yield
1189 yield
1190 yield
1191 yield
1192 yield
1193 yield from wait_for_busy_clear(dut)
1194
1195 # check status
1196 yield from alusim.check(dut)
1197 yield from alusim.dump(dut)
1198
1199
1200 def scoreboard_sim(dut, alusim):
1201
1202 seed(0)
1203
1204 for i in range(1):
1205
1206 # set random values in the registers
1207 for i in range(1, dut.n_regs):
1208 #val = randint(0, (1<<alusim.rwidth)-1)
1209 #val = 31+i*3
1210 val = i
1211 yield dut.intregs.regs[i].reg.eq(val)
1212 alusim.setval(i, val)
1213
1214 # create some instructions (some random, some regression tests)
1215 instrs = []
1216 if False:
1217 instrs = create_random_ops(dut, 15, True, 4)
1218
1219 if False: # LD/ST test (with immediate)
1220 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1221 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1222
1223 if False:
1224 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1225
1226 if False:
1227 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1228 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1229 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1230
1231 if True:
1232 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1233 0, 0, (0, 0)))
1234 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1235 0, 0, (0, 0)))
1236 if False:
1237 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1238 1, 7, (0, 0)))
1239 if False:
1240 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1241 0, 0, (0, 0)))
1242
1243 if False:
1244 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1245 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1246 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1247 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1248 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1249
1250 if False:
1251 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1252 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1253 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1254
1255 if False:
1256 instrs.append((5, 6, 2, 1))
1257 instrs.append((2, 2, 4, 0))
1258 #instrs.append((2, 2, 3, 1))
1259
1260 if False:
1261 instrs.append((2, 1, 2, 3))
1262
1263 if False:
1264 instrs.append((2, 6, 2, 1))
1265 instrs.append((2, 1, 2, 0))
1266
1267 if False:
1268 instrs.append((1, 2, 7, 2))
1269 instrs.append((7, 1, 5, 0))
1270 instrs.append((4, 4, 1, 1))
1271
1272 if False:
1273 instrs.append((5, 6, 2, 2))
1274 instrs.append((1, 1, 4, 1))
1275 instrs.append((6, 5, 3, 0))
1276
1277 if False:
1278 # Write-after-Write Hazard
1279 instrs.append((3, 6, 7, 2))
1280 instrs.append((4, 4, 7, 1))
1281
1282 if False:
1283 # self-read/write-after-write followed by Read-after-Write
1284 instrs.append((1, 1, 1, 1))
1285 instrs.append((1, 5, 3, 0))
1286
1287 if False:
1288 # Read-after-Write followed by self-read-after-write
1289 instrs.append((5, 6, 1, 2))
1290 instrs.append((1, 1, 1, 1))
1291
1292 if False:
1293 # self-read-write sandwich
1294 instrs.append((5, 6, 1, 2))
1295 instrs.append((1, 1, 1, 1))
1296 instrs.append((1, 5, 3, 0))
1297
1298 if False:
1299 # very weird failure
1300 instrs.append((5, 2, 5, 2))
1301 instrs.append((2, 6, 3, 0))
1302 instrs.append((4, 2, 2, 1))
1303
1304 if False:
1305 v1 = 4
1306 yield dut.intregs.regs[5].reg.eq(v1)
1307 alusim.setval(5, v1)
1308 yield dut.intregs.regs[3].reg.eq(5)
1309 alusim.setval(3, 5)
1310 instrs.append((5, 3, 3, 4, (0, 0)))
1311 instrs.append((4, 2, 1, 2, (0, 1)))
1312
1313 if False:
1314 v1 = 6
1315 yield dut.intregs.regs[5].reg.eq(v1)
1316 alusim.setval(5, v1)
1317 yield dut.intregs.regs[3].reg.eq(5)
1318 alusim.setval(3, 5)
1319 instrs.append((5, 3, 3, 4, (0, 0)))
1320 instrs.append((4, 2, 1, 2, (1, 0)))
1321
1322 if False:
1323 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1324 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1325 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1326 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1327 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1328 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1329 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1330 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1331 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1332
1333 # issue instruction(s), wait for issue to be free before proceeding
1334 for i, instr in enumerate(instrs):
1335 print (i, instr)
1336 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1337
1338 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1339 (i, src1, src2, dest, op, fn_unit, opi, imm))
1340 alusim.op(op, opi, imm, src1, src2, dest)
1341 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1342 br_ok, br_fail)
1343
1344 # wait for all instructions to stop before checking
1345 while True:
1346 iqlen = yield dut.qlen_o
1347 if iqlen == 0:
1348 break
1349 yield
1350 yield
1351 yield
1352 yield
1353 yield
1354 yield from wait_for_busy_clear(dut)
1355
1356 # check status
1357 yield from alusim.check(dut)
1358 yield from alusim.dump(dut)
1359
1360
1361 def test_scoreboard():
1362 regwidth = 64
1363 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1364 alusim = RegSim(regwidth, 8)
1365 memsim = MemSim(16, 8)
1366
1367 m = Module()
1368 comb = m.d.comb
1369 instruction = Signal(32)
1370
1371 # set up the decoder (and simulator, later)
1372 pdecode = create_pdecode()
1373 #simulator = ISA(pdecode, initial_regs)
1374
1375 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1376 m.submodules.sim = dut
1377
1378 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1379 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1380
1381 vl = rtlil.convert(m, ports=dut.ports())
1382 with open("test_scoreboard6600.il", "w") as f:
1383 f.write(vl)
1384
1385 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1386 vcd_name='test_powerboard6600.vcd')
1387
1388 #run_simulation(dut, scoreboard_sim(dut, alusim),
1389 # vcd_name='test_scoreboard6600.vcd')
1390
1391 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1392 # vcd_name='test_scoreboard6600.vcd')
1393
1394
1395 if __name__ == '__main__':
1396 test_scoreboard()