whitespace/shuffle
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst_multi import LDSTCompUnit
20 from soc.experiment.compldst_multi import CompLDSTOpSubset
21 from soc.experiment.l0_cache import TstL0CacheBuffer
22
23 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
24
25 from soc.decoder.power_enums import InternalOp, Function
26 from soc.decoder.power_decoder import (create_pdecode)
27 from soc.decoder.power_decoder2 import (PowerDecode2)
28 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
29
30 from soc.simulator.program import Program
31
32
33 from nmutil.latch import SRLatch
34 from nmutil.nmoperator import eq
35
36 from random import randint, seed
37 from copy import deepcopy
38 from math import log
39
40 from soc.experiment.sim import RegSim, MemSim
41 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
42
43
44 class CompUnitsBase(Elaboratable):
45 """ Computation Unit Base class.
46
47 Amazingly, this class works recursively. It's supposed to just
48 look after some ALUs (that can handle the same operations),
49 grouping them together, however it turns out that the same code
50 can also group *groups* of Computation Units together as well.
51
52 Basically it was intended just to concatenate the ALU's issue,
53 go_rd etc. signals together, which start out as bits and become
54 sequences. Turns out that the same trick works just as well
55 on Computation Units!
56
57 So this class may be used recursively to present a top-level
58 sequential concatenation of all the signals in and out of
59 ALUs, whilst at the same time making it convenient to group
60 ALUs together.
61
62 At the lower level, the intent is that groups of (identical)
63 ALUs may be passed the same operation. Even beyond that,
64 the intent is that that group of (identical) ALUs actually
65 share the *same pipeline* and as such become a "Concurrent
66 Computation Unit" as defined by Mitch Alsup (see section
67 11.4.9.3)
68 """
69
70 def __init__(self, rwid, units, ldstmode=False):
71 """ Inputs:
72
73 * :rwid: bit width of register file(s) - both FP and INT
74 * :units: sequence of ALUs (or CompUnitsBase derivatives)
75 """
76 self.units = units
77 self.ldstmode = ldstmode
78 self.rwid = rwid
79 self.rwid = rwid
80 if units and isinstance(units[0], CompUnitsBase):
81 self.n_units = 0
82 for u in self.units:
83 self.n_units += u.n_units
84 else:
85 self.n_units = len(units)
86
87 n_units = self.n_units
88
89 # inputs
90 self.issue_i = Signal(n_units, reset_less=True)
91 self.rd0 = go_record(n_units, "rd0")
92 self.rd1 = go_record(n_units, "rd1")
93 self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
94 self.wr0 = go_record(n_units, "wr0")
95 self.go_wr_i = [self.wr0.go]
96 self.shadown_i = Signal(n_units, reset_less=True)
97 self.go_die_i = Signal(n_units, reset_less=True)
98 if ldstmode:
99 self.go_ad_i = Signal(n_units, reset_less=True)
100 self.go_st_i = Signal(n_units, reset_less=True)
101
102 # outputs
103 self.busy_o = Signal(n_units, reset_less=True)
104 self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
105 self.req_rel_o = self.wr0.rel
106 self.done_o = Signal(n_units, reset_less=True)
107 if ldstmode:
108 self.ld_o = Signal(n_units, reset_less=True) # op is LD
109 self.st_o = Signal(n_units, reset_less=True) # op is ST
110 self.adr_rel_o = Signal(n_units, reset_less=True)
111 self.sto_rel_o = Signal(n_units, reset_less=True)
112 self.load_mem_o = Signal(n_units, reset_less=True)
113 self.stwd_mem_o = Signal(n_units, reset_less=True)
114 self.addr_o = Signal(rwid, reset_less=True)
115
116 # in/out register data (note: not register#, actual data)
117 self.data_o = Signal(rwid, reset_less=True)
118 self.src1_i = Signal(rwid, reset_less=True)
119 self.src2_i = Signal(rwid, reset_less=True)
120 # input operand
121
122 def elaborate(self, platform):
123 m = Module()
124 comb = m.d.comb
125
126 for i, alu in enumerate(self.units):
127 setattr(m.submodules, "comp%d" % i, alu)
128
129 go_rd_l0 = []
130 go_rd_l1 = []
131 go_wr_l = []
132 issue_l = []
133 busy_l = []
134 req_rel_l = []
135 done_l = []
136 rd_rel0_l = []
137 rd_rel1_l = []
138 shadow_l = []
139 godie_l = []
140 for alu in self.units:
141 req_rel_l.append(alu.req_rel_o)
142 done_l.append(alu.done_o)
143 shadow_l.append(alu.shadown_i)
144 godie_l.append(alu.go_die_i)
145 print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
146 rd_rel0_l.append(alu.rd_rel_o[0])
147 rd_rel1_l.append(alu.rd_rel_o[1])
148 go_wr_l.append(alu.go_wr_i)
149 go_rd_l0.append(alu.go_rd_i[0])
150 go_rd_l1.append(alu.go_rd_i[1])
151 issue_l.append(alu.issue_i)
152 busy_l.append(alu.busy_o)
153 comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
154 comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
155 comb += self.req_rel_o.eq(Cat(*req_rel_l))
156 comb += self.done_o.eq(Cat(*done_l))
157 comb += self.busy_o.eq(Cat(*busy_l))
158 comb += Cat(*godie_l).eq(self.go_die_i)
159 comb += Cat(*shadow_l).eq(self.shadown_i)
160 comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
161 comb += Cat(*go_rd_l0).eq(self.rd0.go)
162 comb += Cat(*go_rd_l1).eq(self.rd1.go)
163 comb += Cat(*issue_l).eq(self.issue_i)
164
165 # connect data register input/output
166
167 # merge (OR) all integer FU / ALU outputs to a single value
168 # XXX NOTE: this only works because there is a single "port"
169 # protected by a single go_wr. multi-issue requires a bus
170 # to be inserted here.
171 if self.units:
172 data_o = treereduce(self.units, "data_o")
173 comb += self.data_o.eq(data_o)
174 if self.ldstmode:
175 addr_o = treereduce(self.units, "addr_o")
176 comb += self.addr_o.eq(addr_o)
177
178 for i, alu in enumerate(self.units):
179 comb += alu.src1_i.eq(self.src1_i)
180 comb += alu.src2_i.eq(self.src2_i)
181
182 if not self.ldstmode:
183 return m
184
185 ldmem_l = []
186 stmem_l = []
187 go_ad_l = []
188 go_st_l = []
189 ld_l = []
190 st_l = []
191 adr_rel_l = []
192 sto_rel_l = []
193 for alu in self.units:
194 ld_l.append(alu.ld_o)
195 st_l.append(alu.st_o)
196 adr_rel_l.append(alu.adr_rel_o)
197 sto_rel_l.append(alu.sto_rel_o)
198 ldmem_l.append(alu.load_mem_o)
199 stmem_l.append(alu.stwd_mem_o)
200 go_ad_l.append(alu.go_ad_i)
201 go_st_l.append(alu.go_st_i)
202 comb += self.ld_o.eq(Cat(*ld_l))
203 comb += self.st_o.eq(Cat(*st_l))
204 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
205 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
206 comb += self.load_mem_o.eq(Cat(*ldmem_l))
207 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
208 comb += Cat(*go_ad_l).eq(self.go_ad_i)
209 comb += Cat(*go_st_l).eq(self.go_st_i)
210
211 return m
212
213
214 class CompUnitLDSTs(CompUnitsBase):
215
216 def __init__(self, rwid, opwid, n_ldsts, l0):
217 """ Inputs:
218
219 * :rwid: bit width of register file(s) - both FP and INT
220 * :opwid: operand bit width
221 """
222 self.opwid = opwid
223
224 # inputs
225 self.op = CompLDSTOpSubset("cul_i")
226
227 # LD/ST Units
228 units = []
229 for i in range(n_ldsts):
230 pi = l0.l0.dports[i].pi
231 units.append(LDSTCompUnit(pi, rwid, awid=48))
232
233 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
234
235 def elaborate(self, platform):
236 m = CompUnitsBase.elaborate(self, platform)
237 comb = m.d.comb
238
239 # hand the same operation to all units
240 for ldst in self.units:
241 comb += ldst.oper_i.eq(self.op)
242
243 return m
244
245
246 class CompUnitALUs(CompUnitsBase):
247
248 def __init__(self, rwid, opwid, n_alus):
249 """ Inputs:
250
251 * :rwid: bit width of register file(s) - both FP and INT
252 * :opwid: operand bit width
253 """
254 self.opwid = opwid
255
256 # inputs
257 self.op = CompALUOpSubset("cua_i")
258
259 # Int ALUs
260 alus = []
261 for i in range(n_alus):
262 alus.append(ALU(rwid))
263
264 units = []
265 for alu in alus:
266 aluopwid = 3 # extra bit for immediate mode
267 units.append(MultiCompUnit(rwid, alu))
268
269 CompUnitsBase.__init__(self, rwid, units)
270
271 def elaborate(self, platform):
272 m = CompUnitsBase.elaborate(self, platform)
273 comb = m.d.comb
274
275 # hand the subset of operation to ALUs
276 for alu in self.units:
277 comb += alu.oper_i.eq(self.op)
278
279 return m
280
281
282 class CompUnitBR(CompUnitsBase):
283
284 def __init__(self, rwid, opwid):
285 """ Inputs:
286
287 * :rwid: bit width of register file(s) - both FP and INT
288 * :opwid: operand bit width
289
290 Note: bgt unit is returned so that a shadow unit can be created
291 for it
292 """
293 self.opwid = opwid
294
295 # inputs
296 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
297 self.oper_i = Signal(opwid, reset_less=True)
298 self.imm_i = Signal(rwid, reset_less=True)
299
300 # Branch ALU and CU
301 self.bgt = BranchALU(rwid)
302 aluopwid = 3 # extra bit for immediate mode
303 self.br1 = MultiCompUnit(rwid, self.bgt)
304 CompUnitsBase.__init__(self, rwid, [self.br1])
305
306 def elaborate(self, platform):
307 m = CompUnitsBase.elaborate(self, platform)
308 comb = m.d.comb
309
310 # hand the same operation to all units
311 for alu in self.units:
312 #comb += alu.oper_i.eq(self.op) # TODO
313 comb += alu.oper_i.eq(self.oper_i)
314 #comb += alu.imm_i.eq(self.imm_i)
315
316 return m
317
318
319 class FunctionUnits(Elaboratable):
320
321 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
322 self.n_src, self.n_dst = n_src, n_dst
323 self.n_reg = n_reg
324 self.n_int_alus = nf = n_int_alus
325
326 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
327 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
328
329 self.readable_o = Signal(n_int_alus, reset_less=True)
330 self.writable_o = Signal(n_int_alus, reset_less=True)
331
332 # arrays
333 src = []
334 rsel = []
335 rd = []
336 for i in range(n_src):
337 j = i + 1 # name numbering to match src1/src2
338 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
339 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
340 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
341 dst = []
342 dsel = []
343 wr = []
344 for i in range(n_dst):
345 j = i + 1 # name numbering to match src1/src2
346 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
347 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
348 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
349 wpnd = []
350 pend = []
351 for i in range(nf):
352 j = i + 1 # name numbering to match src1/src2
353 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
354 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
355
356 self.dest_i = Array(dst) # Dest in (top)
357 self.src_i = Array(src) # oper in (top)
358
359 # for Register File Select Lines (horizontal), per-reg
360 self.dst_rsel_o = Array(dsel) # dest reg (bot)
361 self.src_rsel_o = Array(rsel) # src reg (bot)
362
363 self.go_rd_i = Array(rd)
364 self.go_wr_i = Array(wr)
365
366 self.go_die_i = Signal(n_int_alus, reset_less=True)
367 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
368
369 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
370
371 def elaborate(self, platform):
372 m = Module()
373 comb = m.d.comb
374 sync = m.d.sync
375
376 n_intfus = self.n_int_alus
377
378 # Integer FU-FU Dep Matrix
379 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
380 m.submodules.intfudeps = intfudeps
381 # Integer FU-Reg Dep Matrix
382 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
383 m.submodules.intregdeps = intregdeps
384
385 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
386 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
387
388 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
389 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
390
391 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
392 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
393 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
394
395 comb += intfudeps.issue_i.eq(self.fn_issue_i)
396 comb += intfudeps.go_die_i.eq(self.go_die_i)
397 comb += self.readable_o.eq(intfudeps.readable_o)
398 comb += self.writable_o.eq(intfudeps.writable_o)
399
400 # Connect function issue / arrays, and dest/src1/src2
401 for i in range(self.n_src):
402 print (i, self.go_rd_i, intfudeps.go_rd_i)
403 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
404 comb += intregdeps.src_i[i].eq(self.src_i[i])
405 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
406 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
407 for i in range(self.n_dst):
408 print (i, self.go_wr_i, intfudeps.go_wr_i)
409 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
410 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
411 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
412 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
413 comb += intregdeps.go_die_i.eq(self.go_die_i)
414 comb += intregdeps.issue_i.eq(self.fn_issue_i)
415
416 return m
417
418
419 class Scoreboard(Elaboratable):
420 def __init__(self, rwid, n_regs):
421 """ Inputs:
422
423 * :rwid: bit width of register file(s) - both FP and INT
424 * :n_regs: depth of register file(s) - number of FP and INT regs
425 """
426 self.rwid = rwid
427 self.n_regs = n_regs
428
429 # Register Files
430 self.intregs = RegFileArray(rwid, n_regs)
431 self.fpregs = RegFileArray(rwid, n_regs)
432
433 # Memory (test for now)
434 self.l0 = TstL0CacheBuffer()
435
436 # issue q needs to get at these
437 self.aluissue = IssueUnitGroup(2)
438 self.lsissue = IssueUnitGroup(2)
439 self.brissue = IssueUnitGroup(1)
440 # and these
441 self.instr = Decode2ToExecute1Type("sc_instr")
442 self.br_oper_i = Signal(4, reset_less=True)
443 self.br_imm_i = Signal(rwid, reset_less=True)
444 self.ls_oper_i = Signal(4, reset_less=True)
445
446 # inputs
447 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
448 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
449 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
450 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
451
452 # outputs
453 self.issue_o = Signal(reset_less=True) # instruction was accepted
454 self.busy_o = Signal(reset_less=True) # at least one CU is busy
455
456 # for branch speculation experiment. branch_direction = 0 if
457 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
458 # branch_succ and branch_fail are requests to have the current
459 # instruction be dependent on the branch unit "shadow" capability.
460 self.branch_succ_i = Signal(reset_less=True)
461 self.branch_fail_i = Signal(reset_less=True)
462 self.branch_direction_o = Signal(2, reset_less=True)
463
464 def elaborate(self, platform):
465 m = Module()
466 comb = m.d.comb
467 sync = m.d.sync
468
469 m.submodules.intregs = self.intregs
470 m.submodules.fpregs = self.fpregs
471 m.submodules.l0 = l0 = self.l0
472
473 # register ports
474 int_dest = self.intregs.write_port("dest")
475 int_src1 = self.intregs.read_port("src1")
476 int_src2 = self.intregs.read_port("src2")
477
478 fp_dest = self.fpregs.write_port("dest")
479 fp_src1 = self.fpregs.read_port("src1")
480 fp_src2 = self.fpregs.read_port("src2")
481
482 # Int ALUs and BR ALUs
483 n_int_alus = 5
484 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
485 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
486
487 # LDST Comp Units
488 n_ldsts = 2
489 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
490
491 # Comp Units
492 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
493 bgt = cub.bgt # get at the branch computation unit
494 br1 = cub.br1
495
496 # Int FUs
497 fu_n_src = 2
498 fu_n_dst = 1
499 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
500 fu_n_src, fu_n_dst)
501
502 # Memory FUs
503 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
504
505 # Memory Priority Picker 1: one gateway per memory port
506 # picks 1 reader and 1 writer to intreg
507 mempick1 = GroupPicker(n_ldsts, 1, 1)
508 m.submodules.mempick1 = mempick1
509
510 # Count of number of FUs
511 n_intfus = n_int_alus
512 n_fp_fus = 0 # for now
513
514 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
515 # picks 1 reader and 1 writer to intreg
516 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
517 m.submodules.intpick1 = ipick1
518
519 # INT/FP Issue Unit
520 regdecode = RegDecode(self.n_regs)
521 m.submodules.regdecode = regdecode
522 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
523 m.submodules.issueunit = issueunit
524
525 # Shadow Matrix. currently n_intfus shadows, to be used for
526 # write-after-write hazards. NOTE: there is one extra for branches,
527 # so the shadow width is increased by 1
528 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
529 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
530
531 # record previous instruction to cast shadow on current instruction
532 prev_shadow = Signal(n_intfus)
533
534 # Branch Speculation recorder. tracks the success/fail state as
535 # each instruction is issued, so that when the branch occurs the
536 # allow/cancel can be issued as appropriate.
537 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
538
539 # ---------
540 # ok start wiring things together...
541 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
542 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
543 # ---------
544
545 # ---------
546 # Issue Unit is where it starts. set up some in/outs for this module
547 # ---------
548 comb += [regdecode.dest_i.eq(self.int_dest_i),
549 regdecode.src1_i.eq(self.int_src1_i),
550 regdecode.src2_i.eq(self.int_src2_i),
551 regdecode.enable_i.eq(self.reg_enable_i),
552 self.issue_o.eq(issueunit.issue_o)
553 ]
554
555 # take these to outside (issue needs them)
556 comb += cua.op.eq_from_execute1(self.instr)
557 comb += cub.oper_i.eq(self.br_oper_i)
558 comb += cub.imm_i.eq(self.br_imm_i)
559 comb += cul.op.eq_from_execute1(self.instr)
560
561 # TODO: issueunit.f (FP)
562
563 # and int function issue / busy arrays, and dest/src1/src2
564 comb += intfus.dest_i[0].eq(regdecode.dest_o)
565 comb += intfus.src_i[0].eq(regdecode.src1_o)
566 comb += intfus.src_i[1].eq(regdecode.src2_o)
567
568 fn_issue_o = issueunit.fn_issue_o
569
570 comb += intfus.fn_issue_i.eq(fn_issue_o)
571 comb += issueunit.busy_i.eq(cu.busy_o)
572 comb += self.busy_o.eq(cu.busy_o.bool())
573
574 # ---------
575 # Memory Function Unit
576 # ---------
577 reset_b = Signal(cul.n_units, reset_less=True)
578 # XXX was cul.go_wr_i not done.o
579 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
580 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
581
582 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
583 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
584 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
585
586 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
587 # in a transitive fashion). This cycle activates based on LDSTCompUnit
588 # issue_i. multi-issue gets a bit more complex but not a lot.
589 prior_ldsts = Signal(cul.n_units, reset_less=True)
590 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
591 with m.If(self.ls_oper_i[3]): # LD bit of operand
592 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
593 with m.If(self.ls_oper_i[2]): # ST bit of operand
594 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
595
596 # TODO: adr_rel_o needs to go into L1 Cache. for now,
597 # just immediately activate go_adr
598 sync += cul.go_ad_i.eq(cul.adr_rel_o)
599
600 # connect up address data
601 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
602 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
603
604 # connect loadable / storable to go_ld/go_st.
605 # XXX should only be done when the memory ld/st has actually happened!
606 go_st_i = Signal(cul.n_units, reset_less=True)
607 go_ld_i = Signal(cul.n_units, reset_less=True)
608 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
609 cul.adr_rel_o & cul.ld_o)
610 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
611 cul.sto_rel_o & cul.st_o)
612 comb += memfus.go_ld_i.eq(go_ld_i)
613 comb += memfus.go_st_i.eq(go_st_i)
614 #comb += cul.go_wr_i.eq(go_ld_i)
615 comb += cul.go_st_i.eq(go_st_i)
616
617 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
618 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
619 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
620
621 # ---------
622 # merge shadow matrices outputs
623 # ---------
624
625 # these are explained in ShadowMatrix docstring, and are to be
626 # connected to the FUReg and FUFU Matrices, to get them to reset
627 anydie = Signal(n_intfus, reset_less=True)
628 allshadown = Signal(n_intfus, reset_less=True)
629 shreset = Signal(n_intfus, reset_less=True)
630 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
631 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
632 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
633
634 # ---------
635 # connect fu-fu matrix
636 # ---------
637
638 # Group Picker... done manually for now.
639 go_rd_o = ipick1.go_rd_o
640 go_wr_o = ipick1.go_wr_o
641 go_rd_i = intfus.go_rd_i
642 go_wr_i = intfus.go_wr_i
643 go_die_i = intfus.go_die_i
644 # NOTE: connect to the shadowed versions so that they can "die" (reset)
645 for i in range(fu_n_src):
646 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
647 for i in range(fu_n_dst):
648 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
649 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
650
651 # Connect Picker
652 # ---------
653 int_rd_o = intfus.readable_o
654 rrel_o = cu.rd_rel_o
655 rqrl_o = cu.req_rel_o
656 for i in range(fu_n_src):
657 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
658 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
659 int_wr_o = intfus.writable_o
660 for i in range(fu_n_dst):
661 # XXX FIXME: rqrl_o[i] here
662 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
663 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
664
665 # ---------
666 # Shadow Matrix
667 # ---------
668
669 comb += shadows.issue_i.eq(fn_issue_o)
670 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
671 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
672 # ---------
673 # NOTE; this setup is for the instruction order preservation...
674
675 # connect shadows / go_dies to Computation Units
676 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
677 comb += cu.go_die_i[0:n_intfus].eq(anydie)
678
679 # ok connect first n_int_fu shadows to busy lines, to create an
680 # instruction-order linked-list-like arrangement, using a bit-matrix
681 # (instead of e.g. a ring buffer).
682
683 # when written, the shadow can be cancelled (and was good)
684 for i in range(n_intfus):
685 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
686 # XXX experiment: use ~cu.busy_o instead. *should* be good
687 # because the comp unit is only free once completed
688 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
689
690 # *previous* instruction shadows *current* instruction, and, obviously,
691 # if the previous is completed (!busy) don't cast the shadow!
692 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
693 for i in range(n_intfus):
694 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
695
696 # ---------
697 # ... and this is for branch speculation. it uses the extra bit
698 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
699 # only needs to set shadow_i, s_fail_i and s_good_i
700
701 # issue captures shadow_i (if enabled)
702 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
703
704 bactive = Signal(reset_less=True)
705 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
706
707 # instruction being issued (fn_issue_o) has a shadow cast by the branch
708 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
709 comb += bshadow.issue_i.eq(fn_issue_o)
710 for i in range(n_intfus):
711 with m.If(fn_issue_o & (Const(1 << i))):
712 comb += bshadow.shadow_i[i][0].eq(1)
713
714 # finally, we need an indicator to the test infrastructure as to
715 # whether the branch succeeded or failed, plus, link up to the
716 # "recorder" of whether the instruction was under shadow or not
717
718 with m.If(br1.issue_i):
719 sync += bspec.active_i.eq(1)
720 with m.If(self.branch_succ_i):
721 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
722 with m.If(self.branch_fail_i):
723 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
724
725 # branch is active (TODO: a better signal: this is over-using the
726 # go_write signal - actually the branch should not be "writing")
727 with m.If(br1.go_wr_i):
728 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
729 sync += bspec.active_i.eq(0)
730 comb += bspec.br_i.eq(1)
731 # branch occurs if data == 1, failed if data == 0
732 comb += bspec.br_ok_i.eq(br1.data_o == 1)
733 for i in range(n_intfus):
734 # *expected* direction of the branch matched against *actual*
735 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
736 # ... or it didn't
737 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
738
739 # ---------
740 # Connect Register File(s)
741 # ---------
742 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
743 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
744 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
745
746 # connect ALUs to regfile
747 comb += int_dest.data_i.eq(cu.data_o)
748 comb += cu.src1_i.eq(int_src1.data_o)
749 comb += cu.src2_i.eq(int_src2.data_o)
750
751 # connect ALU Computation Units
752 for i in range(fu_n_src):
753 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
754 for i in range(fu_n_dst):
755 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
756 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
757
758 return m
759
760 def __iter__(self):
761 yield from self.intregs
762 yield from self.fpregs
763 yield self.int_dest_i
764 yield self.int_src1_i
765 yield self.int_src2_i
766 yield self.issue_o
767 yield self.branch_succ_i
768 yield self.branch_fail_i
769 yield self.branch_direction_o
770
771 def ports(self):
772 return list(self)
773
774
775 class IssueToScoreboard(Elaboratable):
776
777 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
778 self.qlen = qlen
779 self.n_in = n_in
780 self.n_out = n_out
781 self.rwid = rwid
782 self.opw = opwid
783 self.n_regs = n_regs
784
785 mqbits = unsigned(int(log(qlen) / log(2))+2)
786 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
787 self.p_ready_o = Signal() # instructions were added
788 self.data_i = Instruction._nq(n_in, "data_i")
789
790 self.busy_o = Signal(reset_less=True) # at least one CU is busy
791 self.qlen_o = Signal(mqbits, reset_less=True)
792
793 def elaborate(self, platform):
794 m = Module()
795 comb = m.d.comb
796 sync = m.d.sync
797
798 iq = InstructionQ(self.rwid, self.opw, self.qlen,
799 self.n_in, self.n_out)
800 sc = Scoreboard(self.rwid, self.n_regs)
801 m.submodules.iq = iq
802 m.submodules.sc = sc
803
804 # get at the regfile for testing
805 self.intregs = sc.intregs
806
807 # and the "busy" signal and instruction queue length
808 comb += self.busy_o.eq(sc.busy_o)
809 comb += self.qlen_o.eq(iq.qlen_o)
810
811 # link up instruction queue
812 comb += iq.p_add_i.eq(self.p_add_i)
813 comb += self.p_ready_o.eq(iq.p_ready_o)
814 for i in range(self.n_in):
815 comb += eq(iq.data_i[i], self.data_i[i])
816
817 # take instruction and process it. note that it's possible to
818 # "inspect" the queue contents *without* actually removing the
819 # items. items are only removed when the
820
821 # in "waiting" state
822 wait_issue_br = Signal()
823 wait_issue_alu = Signal()
824 wait_issue_ls = Signal()
825
826 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
827 # set instruction pop length to 1 if the unit accepted
828 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
829 with m.If(iq.qlen_o != 0):
830 comb += iq.n_sub_i.eq(1)
831 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
832 with m.If(iq.qlen_o != 0):
833 comb += iq.n_sub_i.eq(1)
834 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
835 with m.If(iq.qlen_o != 0):
836 comb += iq.n_sub_i.eq(1)
837
838 # see if some instruction(s) are here. note that this is
839 # "inspecting" the in-place queue. note also that on the
840 # cycle following "waiting" for fn_issue_o to be set, the
841 # "resetting" done above (insn_i=0) could be re-ASSERTed.
842 with m.If(iq.qlen_o != 0):
843 # get the operands and operation
844 instr = iq.data_o[0]
845 imm = instr.imm_data.data
846 dest = instr.write_reg.data
847 src1 = instr.read_reg1.data
848 src2 = instr.read_reg2.data
849 op = instr.insn_type
850 fu = instr.fn_unit
851 opi = instr.imm_data.ok # immediate set
852
853 # set the src/dest regs
854 comb += sc.int_dest_i.eq(dest)
855 comb += sc.int_src1_i.eq(src1)
856 comb += sc.int_src2_i.eq(src2)
857 comb += sc.reg_enable_i.eq(1) # enable the regfile
858 comb += sc.instr.eq(instr)
859
860 # choose a Function-Unit-Group
861 with m.If(fu == Function.ALU): # alu
862 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
863 comb += wait_issue_alu.eq(1)
864 with m.Elif(fu == Function.LDST): # ld/st
865 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
866 comb += wait_issue_ls.eq(1)
867
868 with m.Elif((op & (0x3 << 2)) != 0): # branch
869 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
870 comb += sc.br_imm_i.eq(imm)
871 comb += sc.brissue.insn_i.eq(1)
872 comb += wait_issue_br.eq(1)
873 # XXX TODO
874 # these indicate that the instruction is to be made
875 # shadow-dependent on
876 # (either) branch success or branch fail
877 # yield sc.branch_fail_i.eq(branch_fail)
878 # yield sc.branch_succ_i.eq(branch_success)
879
880 return m
881
882 def __iter__(self):
883 yield self.p_ready_o
884 for o in self.data_i:
885 yield from list(o)
886 yield self.p_add_i
887
888 def ports(self):
889 return list(self)
890
891
892 def power_instr_q(dut, pdecode2, ins, code):
893 instrs = [pdecode2.e]
894
895 sendlen = 1
896 for idx, instr in enumerate(instrs):
897 yield dut.data_i[idx].eq(instr)
898 insn_type = yield instr.insn_type
899 fn_unit = yield instr.fn_unit
900 print("senddata ", idx, insn_type, fn_unit, instr)
901 yield dut.p_add_i.eq(sendlen)
902 yield
903 o_p_ready = yield dut.p_ready_o
904 while not o_p_ready:
905 yield
906 o_p_ready = yield dut.p_ready_o
907
908 yield dut.p_add_i.eq(0)
909
910
911 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
912 branch_success, branch_fail):
913 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
914 'imm_data': (imm, op_imm),
915 'read_reg1': src1, 'read_reg2': src2}]
916
917 sendlen = 1
918 for idx, instr in enumerate(instrs):
919 imm, op_imm = instr['imm_data']
920 reg1 = instr['read_reg1']
921 reg2 = instr['read_reg2']
922 dest = instr['write_reg']
923 insn_type = instr['insn_type']
924 fn_unit = instr['fn_unit']
925 yield dut.data_i[idx].insn_type.eq(insn_type)
926 yield dut.data_i[idx].fn_unit.eq(fn_unit)
927 yield dut.data_i[idx].read_reg1.data.eq(reg1)
928 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
929 yield dut.data_i[idx].read_reg2.data.eq(reg2)
930 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
931 yield dut.data_i[idx].write_reg.data.eq(dest)
932 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
933 yield dut.data_i[idx].imm_data.data.eq(imm)
934 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
935 di = yield dut.data_i[idx]
936 print("senddata %d %x" % (idx, di))
937 yield dut.p_add_i.eq(sendlen)
938 yield
939 o_p_ready = yield dut.p_ready_o
940 while not o_p_ready:
941 yield
942 o_p_ready = yield dut.p_ready_o
943
944 yield dut.p_add_i.eq(0)
945
946
947 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
948 yield from disable_issue(dut)
949 yield dut.int_dest_i.eq(dest)
950 yield dut.int_src1_i.eq(src1)
951 yield dut.int_src2_i.eq(src2)
952 if (op & (0x3 << 2)) != 0: # branch
953 yield dut.brissue.insn_i.eq(1)
954 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
955 yield dut.br_imm_i.eq(imm)
956 dut_issue = dut.brissue
957 else:
958 yield dut.aluissue.insn_i.eq(1)
959 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
960 yield dut.alu_imm_i.eq(imm)
961 dut_issue = dut.aluissue
962 yield dut.reg_enable_i.eq(1)
963
964 # these indicate that the instruction is to be made shadow-dependent on
965 # (either) branch success or branch fail
966 yield dut.branch_fail_i.eq(branch_fail)
967 yield dut.branch_succ_i.eq(branch_success)
968
969 yield
970 yield from wait_for_issue(dut, dut_issue)
971
972
973 def print_reg(dut, rnums):
974 rs = []
975 for rnum in rnums:
976 reg = yield dut.intregs.regs[rnum].reg
977 rs.append("%x" % reg)
978 rnums = map(str, rnums)
979 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
980
981
982 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
983 insts = []
984 for i in range(n_ops):
985 src1 = randint(1, dut.n_regs-1)
986 src2 = randint(1, dut.n_regs-1)
987 imm = randint(1, (1 << dut.rwid)-1)
988 dest = randint(1, dut.n_regs-1)
989 op = randint(0, max_opnums)
990 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
991
992 if shadowing:
993 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
994 else:
995 insts.append((src1, src2, dest, op, opi, imm))
996 return insts
997
998
999 def wait_for_busy_clear(dut):
1000 while True:
1001 busy_o = yield dut.busy_o
1002 if not busy_o:
1003 break
1004 print("busy",)
1005 yield
1006
1007
1008 def disable_issue(dut):
1009 yield dut.aluissue.insn_i.eq(0)
1010 yield dut.brissue.insn_i.eq(0)
1011 yield dut.lsissue.insn_i.eq(0)
1012
1013
1014 def wait_for_issue(dut, dut_issue):
1015 while True:
1016 issue_o = yield dut_issue.fn_issue_o
1017 if issue_o:
1018 yield from disable_issue(dut)
1019 yield dut.reg_enable_i.eq(0)
1020 break
1021 print("busy",)
1022 # yield from print_reg(dut, [1,2,3])
1023 yield
1024 # yield from print_reg(dut, [1,2,3])
1025
1026
1027 def scoreboard_branch_sim(dut, alusim):
1028
1029 iseed = 3
1030
1031 for i in range(1):
1032
1033 print("rseed", iseed)
1034 seed(iseed)
1035 iseed += 1
1036
1037 yield dut.branch_direction_o.eq(0)
1038
1039 # set random values in the registers
1040 for i in range(1, dut.n_regs):
1041 val = 31+i*3
1042 val = randint(0, (1 << alusim.rwidth)-1)
1043 yield dut.intregs.regs[i].reg.eq(val)
1044 alusim.setval(i, val)
1045
1046 if False:
1047 # create some instructions: branches create a tree
1048 insts = create_random_ops(dut, 1, True, 1)
1049 #insts.append((6, 6, 1, 2, (0, 0)))
1050 #insts.append((4, 3, 3, 0, (0, 0)))
1051
1052 src1 = randint(1, dut.n_regs-1)
1053 src2 = randint(1, dut.n_regs-1)
1054 #op = randint(4, 7)
1055 op = 4 # only BGT at the moment
1056
1057 branch_ok = create_random_ops(dut, 1, True, 1)
1058 branch_fail = create_random_ops(dut, 1, True, 1)
1059
1060 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1061
1062 if True:
1063 insts = []
1064 insts.append((3, 5, 2, 0, (0, 0)))
1065 branch_ok = []
1066 branch_fail = []
1067 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1068 branch_ok.append(None)
1069 branch_fail.append((1, 1, 2, 0, (0, 1)))
1070 #branch_fail.append( None )
1071 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1072
1073 siminsts = deepcopy(insts)
1074
1075 # issue instruction(s)
1076 i = -1
1077 instrs = insts
1078 branch_direction = 0
1079 while instrs:
1080 yield
1081 yield
1082 i += 1
1083 branch_direction = yield dut.branch_direction_o # way branch went
1084 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1085 if branch_direction == 1 and shadow_on:
1086 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1087 continue # branch was "success" and this is a "failed"... skip
1088 if branch_direction == 2 and shadow_off:
1089 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1090 continue # branch was "fail" and this is a "success"... skip
1091 if branch_direction != 0:
1092 shadow_on = 0
1093 shadow_off = 0
1094 is_branch = op >= 4
1095 if is_branch:
1096 branch_ok, branch_fail = dest
1097 dest = src2
1098 # ok zip up the branch success / fail instructions and
1099 # drop them into the queue, one marked "to have branch success"
1100 # the other to be marked shadow branch "fail".
1101 # one out of each of these will be cancelled
1102 for ok, fl in zip(branch_ok, branch_fail):
1103 if ok:
1104 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1105 if fl:
1106 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1107 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1108 (i, src1, src2, dest, op, shadow_on, shadow_off))
1109 yield from int_instr(dut, op, src1, src2, dest,
1110 shadow_on, shadow_off)
1111
1112 # wait for all instructions to stop before checking
1113 yield
1114 yield from wait_for_busy_clear(dut)
1115
1116 i = -1
1117 while siminsts:
1118 instr = siminsts.pop(0)
1119 if instr is None:
1120 continue
1121 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1122 i += 1
1123 is_branch = op >= 4
1124 if is_branch:
1125 branch_ok, branch_fail = dest
1126 dest = src2
1127 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1128 (i, src1, src2, dest, op, shadow_on, shadow_off))
1129 branch_res = alusim.op(op, src1, src2, dest)
1130 if is_branch:
1131 if branch_res:
1132 siminsts += branch_ok
1133 else:
1134 siminsts += branch_fail
1135
1136 # check status
1137 yield from alusim.check(dut)
1138 yield from alusim.dump(dut)
1139
1140
1141 def power_sim(m, dut, pdecode2, instruction, alusim):
1142
1143 seed(0)
1144
1145 for i in range(1):
1146
1147 # set random values in the registers
1148 for i in range(1, dut.n_regs):
1149 #val = randint(0, (1<<alusim.rwidth)-1)
1150 #val = 31+i*3
1151 val = i # XXX actually, not random at all
1152 yield dut.intregs.regs[i].reg.eq(val)
1153 alusim.setval(i, val)
1154
1155 # create some instructions
1156 lst = []
1157 if False:
1158 lst += ["addi 2, 0, 0x4321",
1159 "addi 3, 0, 0x1234",
1160 "add 1, 3, 2",
1161 "add 4, 3, 5"
1162 ]
1163 if True:
1164 lst += [ "lbzu 6, 7(2)",
1165
1166 ]
1167
1168 with Program(lst) as program:
1169 gen = program.generate_instructions()
1170
1171 # issue instruction(s), wait for issue to be free before proceeding
1172 for ins, code in zip(gen, program.assembly.splitlines()):
1173 yield instruction.eq(ins) # raw binary instr.
1174 yield #Delay(1e-6)
1175
1176 print("binary 0x{:X}".format(ins & 0xffffffff))
1177 print("assembly", code)
1178
1179 #alusim.op(op, opi, imm, src1, src2, dest)
1180 yield from power_instr_q(dut, pdecode2, ins, code)
1181
1182 # wait for all instructions to stop before checking
1183 while True:
1184 iqlen = yield dut.qlen_o
1185 if iqlen == 0:
1186 break
1187 yield
1188 yield
1189 yield
1190 yield
1191 yield
1192 yield from wait_for_busy_clear(dut)
1193
1194 # check status
1195 yield from alusim.check(dut)
1196 yield from alusim.dump(dut)
1197
1198
1199 def scoreboard_sim(dut, alusim):
1200
1201 seed(0)
1202
1203 for i in range(1):
1204
1205 # set random values in the registers
1206 for i in range(1, dut.n_regs):
1207 #val = randint(0, (1<<alusim.rwidth)-1)
1208 #val = 31+i*3
1209 val = i
1210 yield dut.intregs.regs[i].reg.eq(val)
1211 alusim.setval(i, val)
1212
1213 # create some instructions (some random, some regression tests)
1214 instrs = []
1215 if False:
1216 instrs = create_random_ops(dut, 15, True, 4)
1217
1218 if False: # LD/ST test (with immediate)
1219 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1220 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1221
1222 if False:
1223 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1224
1225 if False:
1226 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1227 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1228 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1229
1230 if True:
1231 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1232 0, 0, (0, 0)))
1233 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1234 0, 0, (0, 0)))
1235 if False:
1236 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1237 1, 7, (0, 0)))
1238 if False:
1239 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1240 0, 0, (0, 0)))
1241
1242 if False:
1243 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1244 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1245 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1246 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1247 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1248
1249 if False:
1250 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1251 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1252 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1253
1254 if False:
1255 instrs.append((5, 6, 2, 1))
1256 instrs.append((2, 2, 4, 0))
1257 #instrs.append((2, 2, 3, 1))
1258
1259 if False:
1260 instrs.append((2, 1, 2, 3))
1261
1262 if False:
1263 instrs.append((2, 6, 2, 1))
1264 instrs.append((2, 1, 2, 0))
1265
1266 if False:
1267 instrs.append((1, 2, 7, 2))
1268 instrs.append((7, 1, 5, 0))
1269 instrs.append((4, 4, 1, 1))
1270
1271 if False:
1272 instrs.append((5, 6, 2, 2))
1273 instrs.append((1, 1, 4, 1))
1274 instrs.append((6, 5, 3, 0))
1275
1276 if False:
1277 # Write-after-Write Hazard
1278 instrs.append((3, 6, 7, 2))
1279 instrs.append((4, 4, 7, 1))
1280
1281 if False:
1282 # self-read/write-after-write followed by Read-after-Write
1283 instrs.append((1, 1, 1, 1))
1284 instrs.append((1, 5, 3, 0))
1285
1286 if False:
1287 # Read-after-Write followed by self-read-after-write
1288 instrs.append((5, 6, 1, 2))
1289 instrs.append((1, 1, 1, 1))
1290
1291 if False:
1292 # self-read-write sandwich
1293 instrs.append((5, 6, 1, 2))
1294 instrs.append((1, 1, 1, 1))
1295 instrs.append((1, 5, 3, 0))
1296
1297 if False:
1298 # very weird failure
1299 instrs.append((5, 2, 5, 2))
1300 instrs.append((2, 6, 3, 0))
1301 instrs.append((4, 2, 2, 1))
1302
1303 if False:
1304 v1 = 4
1305 yield dut.intregs.regs[5].reg.eq(v1)
1306 alusim.setval(5, v1)
1307 yield dut.intregs.regs[3].reg.eq(5)
1308 alusim.setval(3, 5)
1309 instrs.append((5, 3, 3, 4, (0, 0)))
1310 instrs.append((4, 2, 1, 2, (0, 1)))
1311
1312 if False:
1313 v1 = 6
1314 yield dut.intregs.regs[5].reg.eq(v1)
1315 alusim.setval(5, v1)
1316 yield dut.intregs.regs[3].reg.eq(5)
1317 alusim.setval(3, 5)
1318 instrs.append((5, 3, 3, 4, (0, 0)))
1319 instrs.append((4, 2, 1, 2, (1, 0)))
1320
1321 if False:
1322 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1323 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1324 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1325 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1326 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1327 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1328 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1329 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1330 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1331
1332 # issue instruction(s), wait for issue to be free before proceeding
1333 for i, instr in enumerate(instrs):
1334 print (i, instr)
1335 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1336
1337 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1338 (i, src1, src2, dest, op, fn_unit, opi, imm))
1339 alusim.op(op, opi, imm, src1, src2, dest)
1340 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1341 br_ok, br_fail)
1342
1343 # wait for all instructions to stop before checking
1344 while True:
1345 iqlen = yield dut.qlen_o
1346 if iqlen == 0:
1347 break
1348 yield
1349 yield
1350 yield
1351 yield
1352 yield
1353 yield from wait_for_busy_clear(dut)
1354
1355 # check status
1356 yield from alusim.check(dut)
1357 yield from alusim.dump(dut)
1358
1359
1360 def test_scoreboard():
1361 regwidth = 64
1362 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1363 alusim = RegSim(regwidth, 8)
1364 memsim = MemSim(16, 8)
1365
1366 m = Module()
1367 comb = m.d.comb
1368 instruction = Signal(32)
1369
1370 # set up the decoder (and simulator, later)
1371 pdecode = create_pdecode()
1372 #simulator = ISA(pdecode, initial_regs)
1373
1374 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1375 m.submodules.sim = dut
1376
1377 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1378 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1379
1380 vl = rtlil.convert(m, ports=dut.ports())
1381 with open("test_scoreboard6600.il", "w") as f:
1382 f.write(vl)
1383
1384 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1385 vcd_name='test_powerboard6600.vcd')
1386
1387 #run_simulation(dut, scoreboard_sim(dut, alusim),
1388 # vcd_name='test_scoreboard6600.vcd')
1389
1390 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1391 # vcd_name='test_scoreboard6600.vcd')
1392
1393
1394 if __name__ == '__main__':
1395 test_scoreboard()