Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / experiment / score6600.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compldst_multi import LDSTCompUnit
19 from soc.experiment.testmem import TestMemory
20
21 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
22
23 from openpower.decoder.power_enums import MicrOp, Function
24 from openpower.decoder.power_decoder import (create_pdecode)
25 from openpower.decoder.power_decoder2 import (PowerDecode2)
26 from openpower.simulator.program import Program
27
28
29 from nmutil.latch import SRLatch
30 from nmutil.nmoperator import eq
31
32 from random import randint, seed
33 from copy import deepcopy
34 from math import log
35
36 from soc.experiment.sim import RegSim, MemSim
37 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
38
39
40 class CompUnitsBase(Elaboratable):
41 """ Computation Unit Base class.
42
43 Amazingly, this class works recursively. It's supposed to just
44 look after some ALUs (that can handle the same operations),
45 grouping them together, however it turns out that the same code
46 can also group *groups* of Computation Units together as well.
47
48 Basically it was intended just to concatenate the ALU's issue,
49 go_rd etc. signals together, which start out as bits and become
50 sequences. Turns out that the same trick works just as well
51 on Computation Units!
52
53 So this class may be used recursively to present a top-level
54 sequential concatenation of all the signals in and out of
55 ALUs, whilst at the same time making it convenient to group
56 ALUs together.
57
58 At the lower level, the intent is that groups of (identical)
59 ALUs may be passed the same operation. Even beyond that,
60 the intent is that that group of (identical) ALUs actually
61 share the *same pipeline* and as such become a "Concurrent
62 Computation Unit" as defined by Mitch Alsup (see section
63 11.4.9.3)
64 """
65
66 def __init__(self, rwid, units, ldstmode=False):
67 """ Inputs:
68
69 * :rwid: bit width of register file(s) - both FP and INT
70 * :units: sequence of ALUs (or CompUnitsBase derivatives)
71 """
72 self.units = units
73 self.ldstmode = ldstmode
74 self.rwid = rwid
75 self.rwid = rwid
76 if units and isinstance(units[0], CompUnitsBase):
77 self.n_units = 0
78 for u in self.units:
79 self.n_units += u.n_units
80 else:
81 self.n_units = len(units)
82
83 n_units = self.n_units
84
85 # inputs
86 self.issue_i = Signal(n_units, reset_less=True)
87 self.go_rd_i = Signal(n_units, reset_less=True)
88 self.go_wr_i = Signal(n_units, reset_less=True)
89 self.shadown_i = Signal(n_units, reset_less=True)
90 self.go_die_i = Signal(n_units, reset_less=True)
91 if ldstmode:
92 self.go_ad_i = Signal(n_units, reset_less=True)
93 self.go_st_i = Signal(n_units, reset_less=True)
94
95 # outputs
96 self.busy_o = Signal(n_units, reset_less=True)
97 self.rd_rel_o = Signal(n_units, reset_less=True)
98 self.req_rel_o = Signal(n_units, reset_less=True)
99 self.done_o = Signal(n_units, reset_less=True)
100 if ldstmode:
101 self.ld_o = Signal(n_units, reset_less=True) # op is LD
102 self.st_o = Signal(n_units, reset_less=True) # op is ST
103 self.adr_rel_o = Signal(n_units, reset_less=True)
104 self.sto_rel_o = Signal(n_units, reset_less=True)
105 self.load_mem_o = Signal(n_units, reset_less=True)
106 self.stwd_mem_o = Signal(n_units, reset_less=True)
107 self.addr_o = Signal(rwid, reset_less=True)
108
109 # in/out register data (note: not register#, actual data)
110 self.o_data = Signal(rwid, reset_less=True)
111 self.src1_i = Signal(rwid, reset_less=True)
112 self.src2_i = Signal(rwid, reset_less=True)
113 # input operand
114
115 def elaborate(self, platform):
116 m = Module()
117 comb = m.d.comb
118
119 for i, alu in enumerate(self.units):
120 setattr(m.submodules, "comp%d" % i, alu)
121
122 go_rd_l = []
123 go_wr_l = []
124 issue_l = []
125 busy_l = []
126 req_rel_l = []
127 done_l = []
128 rd_rel_l = []
129 shadow_l = []
130 godie_l = []
131 for alu in self.units:
132 req_rel_l.append(alu.req_rel_o)
133 done_l.append(alu.done_o)
134 rd_rel_l.append(alu.rd_rel_o)
135 shadow_l.append(alu.shadown_i)
136 godie_l.append(alu.go_die_i)
137 go_wr_l.append(alu.go_wr_i)
138 go_rd_l.append(alu.go_rd_i)
139 issue_l.append(alu.issue_i)
140 busy_l.append(alu.busy_o)
141 comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
142 comb += self.req_rel_o.eq(Cat(*req_rel_l))
143 comb += self.done_o.eq(Cat(*done_l))
144 comb += self.busy_o.eq(Cat(*busy_l))
145 comb += Cat(*godie_l).eq(self.go_die_i)
146 comb += Cat(*shadow_l).eq(self.shadown_i)
147 comb += Cat(*go_wr_l).eq(self.go_wr_i)
148 comb += Cat(*go_rd_l).eq(self.go_rd_i)
149 comb += Cat(*issue_l).eq(self.issue_i)
150
151 # connect data register input/output
152
153 # merge (OR) all integer FU / ALU outputs to a single value
154 if self.units:
155 o_data = treereduce(self.units, "o_data")
156 comb += self.o_data.eq(o_data)
157 if self.ldstmode:
158 addr_o = treereduce(self.units, "addr_o")
159 comb += self.addr_o.eq(addr_o)
160
161 for i, alu in enumerate(self.units):
162 comb += alu.src1_i.eq(self.src1_i)
163 comb += alu.src2_i.eq(self.src2_i)
164
165 if not self.ldstmode:
166 return m
167
168 ldmem_l = []
169 stmem_l = []
170 go_ad_l = []
171 go_st_l = []
172 ld_l = []
173 st_l = []
174 adr_rel_l = []
175 sto_rel_l = []
176 for alu in self.units:
177 ld_l.append(alu.ld_o)
178 st_l.append(alu.st_o)
179 adr_rel_l.append(alu.adr_rel_o)
180 sto_rel_l.append(alu.sto_rel_o)
181 ldmem_l.append(alu.load_mem_o)
182 stmem_l.append(alu.stwd_mem_o)
183 go_ad_l.append(alu.go_ad_i)
184 go_st_l.append(alu.go_st_i)
185 comb += self.ld_o.eq(Cat(*ld_l))
186 comb += self.st_o.eq(Cat(*st_l))
187 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
188 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
189 comb += self.load_mem_o.eq(Cat(*ldmem_l))
190 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
191 comb += Cat(*go_ad_l).eq(self.go_ad_i)
192 comb += Cat(*go_st_l).eq(self.go_st_i)
193
194 return m
195
196
197 class CompUnitLDSTs(CompUnitsBase):
198
199 def __init__(self, rwid, opwid, n_ldsts, mem):
200 """ Inputs:
201
202 * :rwid: bit width of register file(s) - both FP and INT
203 * :opwid: operand bit width
204 """
205 self.opwid = opwid
206
207 # inputs
208 self.oper_i = Signal(opwid, reset_less=True)
209 self.imm_i = Signal(rwid, reset_less=True)
210
211 # Int ALUs
212 self.alus = []
213 for i in range(n_ldsts):
214 self.alus.append(ALU(rwid))
215
216 units = []
217 for alu in self.alus:
218 aluopwid = 4 # see compldst.py for "internal" opcode
219 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
220
221 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
222
223 def elaborate(self, platform):
224 m = CompUnitsBase.elaborate(self, platform)
225 comb = m.d.comb
226
227 # hand the same operation to all units, 4 lower bits though
228 for alu in self.units:
229 comb += alu.oper_i[0:4].eq(self.oper_i)
230 comb += alu.imm_i.eq(self.imm_i)
231 comb += alu.isalu_i.eq(0)
232
233 return m
234
235
236 class CompUnitALUs(CompUnitsBase):
237
238 def __init__(self, rwid, opwid, n_alus):
239 """ Inputs:
240
241 * :rwid: bit width of register file(s) - both FP and INT
242 * :opwid: operand bit width
243 """
244 self.opwid = opwid
245
246 # inputs
247 self.op = CompALUOpSubset("cua_i")
248 self.oper_i = Signal(opwid, reset_less=True)
249 self.imm_i = Signal(rwid, reset_less=True)
250
251 # Int ALUs
252 alus = []
253 for i in range(n_alus):
254 alus.append(ALU(rwid))
255
256 units = []
257 for alu in alus:
258 aluopwid = 3 # extra bit for immediate mode
259 units.append(ComputationUnitNoDelay(rwid, alu))
260
261 CompUnitsBase.__init__(self, rwid, units)
262
263 def elaborate(self, platform):
264 m = CompUnitsBase.elaborate(self, platform)
265 comb = m.d.comb
266
267 # hand the subset of operation to ALUs
268 for alu in self.units:
269 comb += alu.oper_i.eq(self.op)
270 #comb += alu.oper_i[0:3].eq(self.oper_i)
271 #comb += alu.imm_i.eq(self.imm_i)
272
273 return m
274
275
276 class CompUnitBR(CompUnitsBase):
277
278 def __init__(self, rwid, opwid):
279 """ Inputs:
280
281 * :rwid: bit width of register file(s) - both FP and INT
282 * :opwid: operand bit width
283
284 Note: bgt unit is returned so that a shadow unit can be created
285 for it
286 """
287 self.opwid = opwid
288
289 # inputs
290 self.oper_i = Signal(opwid, reset_less=True)
291 self.imm_i = Signal(rwid, reset_less=True)
292
293 # Branch ALU and CU
294 self.bgt = BranchALU(rwid)
295 aluopwid = 3 # extra bit for immediate mode
296 self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
297 CompUnitsBase.__init__(self, rwid, [self.br1])
298
299 def elaborate(self, platform):
300 m = CompUnitsBase.elaborate(self, platform)
301 comb = m.d.comb
302
303 # hand the same operation to all units
304 for alu in self.units:
305 comb += alu.oper_i.eq(self.oper_i)
306 #comb += alu.imm_i.eq(self.imm_i)
307
308 return m
309
310
311 class FunctionUnits(Elaboratable):
312
313 def __init__(self, n_regs, n_int_alus):
314 self.n_regs = n_regs
315 self.n_int_alus = n_int_alus
316
317 self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
318 self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
319 self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
320
321 self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
322 self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
323
324 self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
325 self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
326 self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
327
328 self.readable_o = Signal(n_int_alus, reset_less=True)
329 self.writable_o = Signal(n_int_alus, reset_less=True)
330
331 self.go_rd_i = Signal(n_int_alus, reset_less=True)
332 self.go_wr_i = Signal(n_int_alus, reset_less=True)
333 self.go_die_i = Signal(n_int_alus, reset_less=True)
334 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
335
336 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
337
338 def elaborate(self, platform):
339 m = Module()
340 comb = m.d.comb
341 sync = m.d.sync
342
343 n_intfus = self.n_int_alus
344
345 # Integer FU-FU Dep Matrix
346 intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
347 m.submodules.intfudeps = intfudeps
348 # Integer FU-Reg Dep Matrix
349 intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
350 m.submodules.intregdeps = intregdeps
351
352 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
353 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
354
355 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
356 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
357
358 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
359 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
360 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
361
362 comb += intfudeps.issue_i.eq(self.fn_issue_i)
363 comb += intfudeps.go_rd_i.eq(self.go_rd_i)
364 comb += intfudeps.go_wr_i.eq(self.go_wr_i)
365 comb += intfudeps.go_die_i.eq(self.go_die_i)
366 comb += self.readable_o.eq(intfudeps.readable_o)
367 comb += self.writable_o.eq(intfudeps.writable_o)
368
369 # Connect function issue / arrays, and dest/src1/src2
370 comb += intregdeps.dest_i.eq(self.dest_i)
371 comb += intregdeps.src_i[0].eq(self.src1_i)
372 comb += intregdeps.src_i[1].eq(self.src2_i)
373
374 comb += intregdeps.go_rd_i.eq(self.go_rd_i)
375 comb += intregdeps.go_wr_i.eq(self.go_wr_i)
376 comb += intregdeps.go_die_i.eq(self.go_die_i)
377 comb += intregdeps.issue_i.eq(self.fn_issue_i)
378
379 comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
380 comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
381 comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
382
383 return m
384
385
386 class Scoreboard(Elaboratable):
387 def __init__(self, rwid, n_regs):
388 """ Inputs:
389
390 * :rwid: bit width of register file(s) - both FP and INT
391 * :n_regs: depth of register file(s) - number of FP and INT regs
392 """
393 self.rwid = rwid
394 self.n_regs = n_regs
395
396 # Register Files
397 self.intregs = RegFileArray(rwid, n_regs)
398 self.fpregs = RegFileArray(rwid, n_regs)
399
400 # Memory (test for now)
401 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
402
403 # issue q needs to get at these
404 self.aluissue = IssueUnitGroup(2)
405 self.lsissue = IssueUnitGroup(2)
406 self.brissue = IssueUnitGroup(1)
407 # and these
408 self.alu_op = CompALUOpSubset("alu")
409 self.br_oper_i = Signal(4, reset_less=True)
410 self.br_imm_i = Signal(rwid, reset_less=True)
411 self.ls_oper_i = Signal(4, reset_less=True)
412 self.ls_imm_i = Signal(rwid, reset_less=True)
413
414 # inputs
415 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
416 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
417 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
418 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
419
420 # outputs
421 self.issue_o = Signal(reset_less=True) # instruction was accepted
422 self.busy_o = Signal(reset_less=True) # at least one CU is busy
423
424 # for branch speculation experiment. branch_direction = 0 if
425 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
426 # branch_succ and branch_fail are requests to have the current
427 # instruction be dependent on the branch unit "shadow" capability.
428 self.branch_succ_i = Signal(reset_less=True)
429 self.branch_fail_i = Signal(reset_less=True)
430 self.branch_direction_o = Signal(2, reset_less=True)
431
432 def elaborate(self, platform):
433 m = Module()
434 comb = m.d.comb
435 sync = m.d.sync
436
437 m.submodules.intregs = self.intregs
438 m.submodules.fpregs = self.fpregs
439 m.submodules.mem = mem = self.mem
440
441 # register ports
442 int_dest = self.intregs.write_port("dest")
443 int_src1 = self.intregs.read_port("src1")
444 int_src2 = self.intregs.read_port("src2")
445
446 fp_dest = self.fpregs.write_port("dest")
447 fp_src1 = self.fpregs.read_port("src1")
448 fp_src2 = self.fpregs.read_port("src2")
449
450 # Int ALUs and BR ALUs
451 n_int_alus = 5
452 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
453 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
454
455 # LDST Comp Units
456 n_ldsts = 2
457 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
458
459 # Comp Units
460 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
461 bgt = cub.bgt # get at the branch computation unit
462 br1 = cub.br1
463
464 # Int FUs
465 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
466
467 # Memory FUs
468 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
469
470 # Memory Priority Picker 1: one gateway per memory port
471 # picks 1 reader and 1 writer to intreg
472 mempick1 = GroupPicker(n_ldsts)
473 m.submodules.mempick1 = mempick1
474
475 # Count of number of FUs
476 n_intfus = n_int_alus
477 n_fp_fus = 0 # for now
478
479 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
480 # picks 1 reader and 1 writer to intreg
481 intpick1 = GroupPicker(n_intfus)
482 m.submodules.intpick1 = intpick1
483
484 # INT/FP Issue Unit
485 regdecode = RegDecode(self.n_regs)
486 m.submodules.regdecode = regdecode
487 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
488 m.submodules.issueunit = issueunit
489
490 # Shadow Matrix. currently n_intfus shadows, to be used for
491 # write-after-write hazards. NOTE: there is one extra for branches,
492 # so the shadow width is increased by 1
493 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
494 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
495
496 # record previous instruction to cast shadow on current instruction
497 prev_shadow = Signal(n_intfus)
498
499 # Branch Speculation recorder. tracks the success/fail state as
500 # each instruction is issued, so that when the branch occurs the
501 # allow/cancel can be issued as appropriate.
502 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
503
504 # ---------
505 # ok start wiring things together...
506 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
507 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
508 # ---------
509
510 # ---------
511 # Issue Unit is where it starts. set up some in/outs for this module
512 # ---------
513 comb += [regdecode.dest_i.eq(self.int_dest_i),
514 regdecode.src1_i.eq(self.int_src1_i),
515 regdecode.src2_i.eq(self.int_src2_i),
516 regdecode.enable_i.eq(self.reg_enable_i),
517 self.issue_o.eq(issueunit.issue_o)
518 ]
519
520 # take these to outside (issue needs them)
521 comb += cua.op.eq(self.alu_op)
522 comb += cub.oper_i.eq(self.br_oper_i)
523 comb += cub.imm_i.eq(self.br_imm_i)
524 comb += cul.oper_i.eq(self.ls_oper_i)
525 comb += cul.imm_i.eq(self.ls_imm_i)
526
527 # TODO: issueunit.f (FP)
528
529 # and int function issue / busy arrays, and dest/src1/src2
530 comb += intfus.dest_i.eq(regdecode.dest_o)
531 comb += intfus.src1_i.eq(regdecode.src1_o)
532 comb += intfus.src2_i.eq(regdecode.src2_o)
533
534 fn_issue_o = issueunit.fn_issue_o
535
536 comb += intfus.fn_issue_i.eq(fn_issue_o)
537 comb += issueunit.busy_i.eq(cu.busy_o)
538 comb += self.busy_o.eq(cu.busy_o.bool())
539
540 # ---------
541 # Memory Function Unit
542 # ---------
543 reset_b = Signal(cul.n_units, reset_less=True)
544 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
545
546 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
547 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
548 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
549
550 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
551 # in a transitive fashion). This cycle activates based on LDSTCompUnit
552 # issue_i. multi-issue gets a bit more complex but not a lot.
553 prior_ldsts = Signal(cul.n_units, reset_less=True)
554 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
555 with m.If(self.ls_oper_i[3]): # LD bit of operand
556 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
557 with m.If(self.ls_oper_i[2]): # ST bit of operand
558 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
559
560 # TODO: adr_rel_o needs to go into L1 Cache. for now,
561 # just immediately activate go_adr
562 comb += cul.go_ad_i.eq(cul.adr_rel_o)
563
564 # connect up address data
565 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
566 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
567
568 # connect loadable / storable to go_ld/go_st.
569 # XXX should only be done when the memory ld/st has actually happened!
570 go_st_i = Signal(cul.n_units, reset_less=True)
571 go_ld_i = Signal(cul.n_units, reset_less=True)
572 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
573 cul.adr_rel_o & cul.ld_o)
574 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
575 cul.sto_rel_o & cul.st_o)
576 comb += memfus.go_ld_i.eq(go_ld_i)
577 comb += memfus.go_st_i.eq(go_st_i)
578 #comb += cul.go_wr_i.eq(go_ld_i)
579 comb += cul.go_st_i.eq(go_st_i)
580
581 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
582 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
583 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
584
585 # ---------
586 # merge shadow matrices outputs
587 # ---------
588
589 # these are explained in ShadowMatrix docstring, and are to be
590 # connected to the FUReg and FUFU Matrices, to get them to reset
591 anydie = Signal(n_intfus, reset_less=True)
592 allshadown = Signal(n_intfus, reset_less=True)
593 shreset = Signal(n_intfus, reset_less=True)
594 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
595 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
596 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
597
598 # ---------
599 # connect fu-fu matrix
600 # ---------
601
602 # Group Picker... done manually for now.
603 go_rd_o = intpick1.go_rd_o
604 go_wr_o = intpick1.go_wr_o
605 go_rd_i = intfus.go_rd_i
606 go_wr_i = intfus.go_wr_i
607 go_die_i = intfus.go_die_i
608 # NOTE: connect to the shadowed versions so that they can "die" (reset)
609 comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
610 comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
611 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
612
613 # Connect Picker
614 # ---------
615 comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
616 comb += intpick1.req_rel_i[0:n_intfus].eq(cu.done_o[0:n_intfus])
617 int_rd_o = intfus.readable_o
618 int_wr_o = intfus.writable_o
619 comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
620 comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
621
622 # ---------
623 # Shadow Matrix
624 # ---------
625
626 comb += shadows.issue_i.eq(fn_issue_o)
627 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
628 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
629 # ---------
630 # NOTE; this setup is for the instruction order preservation...
631
632 # connect shadows / go_dies to Computation Units
633 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
634 comb += cu.go_die_i[0:n_intfus].eq(anydie)
635
636 # ok connect first n_int_fu shadows to busy lines, to create an
637 # instruction-order linked-list-like arrangement, using a bit-matrix
638 # (instead of e.g. a ring buffer).
639
640 # when written, the shadow can be cancelled (and was good)
641 for i in range(n_intfus):
642 comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
643
644 # *previous* instruction shadows *current* instruction, and, obviously,
645 # if the previous is completed (!busy) don't cast the shadow!
646 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
647 for i in range(n_intfus):
648 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
649
650 # ---------
651 # ... and this is for branch speculation. it uses the extra bit
652 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
653 # only needs to set shadow_i, s_fail_i and s_good_i
654
655 # issue captures shadow_i (if enabled)
656 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
657
658 bactive = Signal(reset_less=True)
659 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
660
661 # instruction being issued (fn_issue_o) has a shadow cast by the branch
662 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
663 comb += bshadow.issue_i.eq(fn_issue_o)
664 for i in range(n_intfus):
665 with m.If(fn_issue_o & (Const(1 << i))):
666 comb += bshadow.shadow_i[i][0].eq(1)
667
668 # finally, we need an indicator to the test infrastructure as to
669 # whether the branch succeeded or failed, plus, link up to the
670 # "recorder" of whether the instruction was under shadow or not
671
672 with m.If(br1.issue_i):
673 sync += bspec.active_i.eq(1)
674 with m.If(self.branch_succ_i):
675 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
676 with m.If(self.branch_fail_i):
677 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
678
679 # branch is active (TODO: a better signal: this is over-using the
680 # go_write signal - actually the branch should not be "writing")
681 with m.If(br1.go_wr_i):
682 sync += self.branch_direction_o.eq(br1.o_data+Const(1, 2))
683 sync += bspec.active_i.eq(0)
684 comb += bspec.br_i.eq(1)
685 # branch occurs if data == 1, failed if data == 0
686 comb += bspec.br_ok_i.eq(br1.o_data == 1)
687 for i in range(n_intfus):
688 # *expected* direction of the branch matched against *actual*
689 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
690 # ... or it didn't
691 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
692
693 # ---------
694 # Connect Register File(s)
695 # ---------
696 comb += int_dest.wen.eq(intfus.dest_rsel_o)
697 comb += int_src1.ren.eq(intfus.src1_rsel_o)
698 comb += int_src2.ren.eq(intfus.src2_rsel_o)
699
700 # connect ALUs to regfile
701 comb += int_dest.i_data.eq(cu.o_data)
702 comb += cu.src1_i.eq(int_src1.o_data)
703 comb += cu.src2_i.eq(int_src2.o_data)
704
705 # connect ALU Computation Units
706 comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
707 comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
708 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
709
710 return m
711
712 def __iter__(self):
713 yield from self.intregs
714 yield from self.fpregs
715 yield self.int_dest_i
716 yield self.int_src1_i
717 yield self.int_src2_i
718 yield self.issue_o
719 yield self.branch_succ_i
720 yield self.branch_fail_i
721 yield self.branch_direction_o
722
723 def ports(self):
724 return list(self)
725
726
727 class IssueToScoreboard(Elaboratable):
728
729 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
730 self.qlen = qlen
731 self.n_in = n_in
732 self.n_out = n_out
733 self.rwid = rwid
734 self.opw = opwid
735 self.n_regs = n_regs
736
737 mqbits = unsigned(int(log(qlen) / log(2))+2)
738 self.p_add_i = Signal(mqbits) # instructions to add (from i_data)
739 self.p_o_ready = Signal() # instructions were added
740 self.i_data = Instruction._nq(n_in, "i_data")
741
742 self.busy_o = Signal(reset_less=True) # at least one CU is busy
743 self.qlen_o = Signal(mqbits, reset_less=True)
744
745 def elaborate(self, platform):
746 m = Module()
747 comb = m.d.comb
748 sync = m.d.sync
749
750 iq = InstructionQ(self.rwid, self.opw, self.qlen,
751 self.n_in, self.n_out)
752 sc = Scoreboard(self.rwid, self.n_regs)
753 m.submodules.iq = iq
754 m.submodules.sc = sc
755
756 # get at the regfile for testing
757 self.intregs = sc.intregs
758
759 # and the "busy" signal and instruction queue length
760 comb += self.busy_o.eq(sc.busy_o)
761 comb += self.qlen_o.eq(iq.qlen_o)
762
763 # link up instruction queue
764 comb += iq.p_add_i.eq(self.p_add_i)
765 comb += self.p_o_ready.eq(iq.p_o_ready)
766 for i in range(self.n_in):
767 comb += eq(iq.i_data[i], self.i_data[i])
768
769 # take instruction and process it. note that it's possible to
770 # "inspect" the queue contents *without* actually removing the
771 # items. items are only removed when the
772
773 # in "waiting" state
774 wait_issue_br = Signal()
775 wait_issue_alu = Signal()
776 wait_issue_ls = Signal()
777
778 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
779 # set instruction pop length to 1 if the unit accepted
780 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
781 with m.If(iq.qlen_o != 0):
782 comb += iq.n_sub_i.eq(1)
783 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
784 with m.If(iq.qlen_o != 0):
785 comb += iq.n_sub_i.eq(1)
786 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
787 with m.If(iq.qlen_o != 0):
788 comb += iq.n_sub_i.eq(1)
789
790 # see if some instruction(s) are here. note that this is
791 # "inspecting" the in-place queue. note also that on the
792 # cycle following "waiting" for fn_issue_o to be set, the
793 # "resetting" done above (insn_i=0) could be re-ASSERTed.
794 with m.If(iq.qlen_o != 0):
795 # get the operands and operation
796 instr = iq.o_data[0]
797 imm = instr.imm_data.data
798 dest = instr.write_reg.data
799 src1 = instr.read_reg1.data
800 src2 = instr.read_reg2.data
801 op = instr.insn_type
802 fu = instr.fn_unit
803 opi = instr.imm_data.ok # immediate set
804
805 # set the src/dest regs
806 comb += sc.int_dest_i.eq(dest)
807 comb += sc.int_src1_i.eq(src1)
808 comb += sc.int_src2_i.eq(src2)
809 comb += sc.reg_enable_i.eq(1) # enable the regfile
810
811 # choose a Function-Unit-Group
812 with m.If(fu == Function.ALU): # alu
813 comb += sc.alu_op.eq_from_execute1(instr)
814 comb += sc.aluissue.insn_i.eq(1)
815 comb += wait_issue_alu.eq(1)
816 with m.Elif((op & (0x3 << 2)) != 0): # branch
817 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
818 comb += sc.br_imm_i.eq(imm)
819 comb += sc.brissue.insn_i.eq(1)
820 comb += wait_issue_br.eq(1)
821 with m.Elif((op & (0x3 << 4)) != 0): # ld/st
822 # see compldst.py
823 # bit 0: ADD/SUB
824 # bit 1: immed
825 # bit 4: LD
826 # bit 5: ST
827 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
828 comb += sc.ls_imm_i.eq(imm)
829 comb += sc.lsissue.insn_i.eq(1)
830 comb += wait_issue_ls.eq(1)
831
832 # XXX TODO
833 # these indicate that the instruction is to be made
834 # shadow-dependent on
835 # (either) branch success or branch fail
836 # yield sc.branch_fail_i.eq(branch_fail)
837 # yield sc.branch_succ_i.eq(branch_success)
838
839 return m
840
841 def __iter__(self):
842 yield self.p_o_ready
843 for o in self.i_data:
844 yield from list(o)
845 yield self.p_add_i
846
847 def ports(self):
848 return list(self)
849
850
851 def power_instr_q(dut, pdecode2, ins, code):
852 instrs = [pdecode2.e]
853
854 sendlen = 1
855 for idx, instr in enumerate(instrs):
856 yield dut.i_data[idx].eq(instr)
857 insn_type = yield instr.insn_type
858 fn_unit = yield instr.fn_unit
859 print("senddata ", idx, insn_type, fn_unit, instr)
860 yield dut.p_add_i.eq(sendlen)
861 yield
862 o_p_ready = yield dut.p_o_ready
863 while not o_p_ready:
864 yield
865 o_p_ready = yield dut.p_o_ready
866
867 yield dut.p_add_i.eq(0)
868
869
870 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
871 branch_success, branch_fail):
872 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
873 'imm_data': (imm, op_imm),
874 'read_reg1': src1, 'read_reg2': src2}]
875
876 sendlen = 1
877 for idx, instr in enumerate(instrs):
878 imm, op_imm = instr['imm_data']
879 reg1 = instr['read_reg1']
880 reg2 = instr['read_reg2']
881 dest = instr['write_reg']
882 insn_type = instr['insn_type']
883 fn_unit = instr['fn_unit']
884 yield dut.i_data[idx].insn_type.eq(insn_type)
885 yield dut.i_data[idx].fn_unit.eq(fn_unit)
886 yield dut.i_data[idx].read_reg1.data.eq(reg1)
887 yield dut.i_data[idx].read_reg1.ok.eq(1) # XXX TODO
888 yield dut.i_data[idx].read_reg2.data.eq(reg2)
889 yield dut.i_data[idx].read_reg2.ok.eq(1) # XXX TODO
890 yield dut.i_data[idx].write_reg.data.eq(dest)
891 yield dut.i_data[idx].write_reg.ok.eq(1) # XXX TODO
892 yield dut.i_data[idx].imm_data.data.eq(imm)
893 yield dut.i_data[idx].imm_data.ok.eq(op_imm)
894 di = yield dut.i_data[idx]
895 print("senddata %d %x" % (idx, di))
896 yield dut.p_add_i.eq(sendlen)
897 yield
898 o_p_ready = yield dut.p_o_ready
899 while not o_p_ready:
900 yield
901 o_p_ready = yield dut.p_o_ready
902
903 yield dut.p_add_i.eq(0)
904
905
906 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
907 yield from disable_issue(dut)
908 yield dut.int_dest_i.eq(dest)
909 yield dut.int_src1_i.eq(src1)
910 yield dut.int_src2_i.eq(src2)
911 if (op & (0x3 << 2)) != 0: # branch
912 yield dut.brissue.insn_i.eq(1)
913 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
914 yield dut.br_imm_i.eq(imm)
915 dut_issue = dut.brissue
916 else:
917 yield dut.aluissue.insn_i.eq(1)
918 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
919 yield dut.alu_imm_i.eq(imm)
920 dut_issue = dut.aluissue
921 yield dut.reg_enable_i.eq(1)
922
923 # these indicate that the instruction is to be made shadow-dependent on
924 # (either) branch success or branch fail
925 yield dut.branch_fail_i.eq(branch_fail)
926 yield dut.branch_succ_i.eq(branch_success)
927
928 yield
929 yield from wait_for_issue(dut, dut_issue)
930
931
932 def print_reg(dut, rnums):
933 rs = []
934 for rnum in rnums:
935 reg = yield dut.intregs.regs[rnum].reg
936 rs.append("%x" % reg)
937 rnums = map(str, rnums)
938 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
939
940
941 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
942 insts = []
943 for i in range(n_ops):
944 src1 = randint(1, dut.n_regs-1)
945 src2 = randint(1, dut.n_regs-1)
946 imm = randint(1, (1 << dut.rwid)-1)
947 dest = randint(1, dut.n_regs-1)
948 op = randint(0, max_opnums)
949 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
950
951 if shadowing:
952 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
953 else:
954 insts.append((src1, src2, dest, op, opi, imm))
955 return insts
956
957
958 def wait_for_busy_clear(dut):
959 while True:
960 busy_o = yield dut.busy_o
961 if not busy_o:
962 break
963 print("busy",)
964 yield
965
966
967 def disable_issue(dut):
968 yield dut.aluissue.insn_i.eq(0)
969 yield dut.brissue.insn_i.eq(0)
970 yield dut.lsissue.insn_i.eq(0)
971
972
973 def wait_for_issue(dut, dut_issue):
974 while True:
975 issue_o = yield dut_issue.fn_issue_o
976 if issue_o:
977 yield from disable_issue(dut)
978 yield dut.reg_enable_i.eq(0)
979 break
980 print("busy",)
981 # yield from print_reg(dut, [1,2,3])
982 yield
983 # yield from print_reg(dut, [1,2,3])
984
985
986 def scoreboard_branch_sim(dut, alusim):
987
988 iseed = 3
989
990 for i in range(1):
991
992 print("rseed", iseed)
993 seed(iseed)
994 iseed += 1
995
996 yield dut.branch_direction_o.eq(0)
997
998 # set random values in the registers
999 for i in range(1, dut.n_regs):
1000 val = 31+i*3
1001 val = randint(0, (1 << alusim.rwidth)-1)
1002 yield dut.intregs.regs[i].reg.eq(val)
1003 alusim.setval(i, val)
1004
1005 if False:
1006 # create some instructions: branches create a tree
1007 insts = create_random_ops(dut, 1, True, 1)
1008 #insts.append((6, 6, 1, 2, (0, 0)))
1009 #insts.append((4, 3, 3, 0, (0, 0)))
1010
1011 src1 = randint(1, dut.n_regs-1)
1012 src2 = randint(1, dut.n_regs-1)
1013 #op = randint(4, 7)
1014 op = 4 # only BGT at the moment
1015
1016 branch_ok = create_random_ops(dut, 1, True, 1)
1017 branch_fail = create_random_ops(dut, 1, True, 1)
1018
1019 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1020
1021 if True:
1022 insts = []
1023 insts.append((3, 5, 2, 0, (0, 0)))
1024 branch_ok = []
1025 branch_fail = []
1026 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1027 branch_ok.append(None)
1028 branch_fail.append((1, 1, 2, 0, (0, 1)))
1029 #branch_fail.append( None )
1030 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1031
1032 siminsts = deepcopy(insts)
1033
1034 # issue instruction(s)
1035 i = -1
1036 instrs = insts
1037 branch_direction = 0
1038 while instrs:
1039 yield
1040 yield
1041 i += 1
1042 branch_direction = yield dut.branch_direction_o # way branch went
1043 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1044 if branch_direction == 1 and shadow_on:
1045 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1046 continue # branch was "success" and this is a "failed"... skip
1047 if branch_direction == 2 and shadow_off:
1048 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1049 continue # branch was "fail" and this is a "success"... skip
1050 if branch_direction != 0:
1051 shadow_on = 0
1052 shadow_off = 0
1053 is_branch = op >= 4
1054 if is_branch:
1055 branch_ok, branch_fail = dest
1056 dest = src2
1057 # ok zip up the branch success / fail instructions and
1058 # drop them into the queue, one marked "to have branch success"
1059 # the other to be marked shadow branch "fail".
1060 # one out of each of these will be cancelled
1061 for ok, fl in zip(branch_ok, branch_fail):
1062 if ok:
1063 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1064 if fl:
1065 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1066 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1067 (i, src1, src2, dest, op, shadow_on, shadow_off))
1068 yield from int_instr(dut, op, src1, src2, dest,
1069 shadow_on, shadow_off)
1070
1071 # wait for all instructions to stop before checking
1072 yield
1073 yield from wait_for_busy_clear(dut)
1074
1075 i = -1
1076 while siminsts:
1077 instr = siminsts.pop(0)
1078 if instr is None:
1079 continue
1080 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1081 i += 1
1082 is_branch = op >= 4
1083 if is_branch:
1084 branch_ok, branch_fail = dest
1085 dest = src2
1086 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1087 (i, src1, src2, dest, op, shadow_on, shadow_off))
1088 branch_res = alusim.op(op, src1, src2, dest)
1089 if is_branch:
1090 if branch_res:
1091 siminsts += branch_ok
1092 else:
1093 siminsts += branch_fail
1094
1095 # check status
1096 yield from alusim.check(dut)
1097 yield from alusim.dump(dut)
1098
1099
1100 def power_sim(m, dut, pdecode2, instruction, alusim):
1101
1102 seed(0)
1103
1104 for i in range(1):
1105
1106 # set random values in the registers
1107 for i in range(1, dut.n_regs):
1108 #val = randint(0, (1<<alusim.rwidth)-1)
1109 #val = 31+i*3
1110 val = i # XXX actually, not random at all
1111 yield dut.intregs.regs[i].reg.eq(val)
1112 alusim.setval(i, val)
1113
1114 # create some instructions
1115 lst = ["addi 3, 0, 0x1234",
1116 "addi 2, 0, 0x4321",
1117 "add 1, 3, 2"]
1118 with Program(lst) as program:
1119 gen = program.generate_instructions()
1120
1121 # issue instruction(s), wait for issue to be free before proceeding
1122 for ins, code in zip(gen, program.assembly.splitlines()):
1123 yield instruction.eq(ins) # raw binary instr.
1124 yield Delay(1e-6)
1125
1126 print("binary 0x{:X}".format(ins & 0xffffffff))
1127 print("assembly", code)
1128
1129 #alusim.op(op, opi, imm, src1, src2, dest)
1130 yield from power_instr_q(dut, pdecode2, ins, code)
1131
1132 # wait for all instructions to stop before checking
1133 while True:
1134 iqlen = yield dut.qlen_o
1135 if iqlen == 0:
1136 break
1137 yield
1138 yield
1139 yield
1140 yield
1141 yield
1142 yield from wait_for_busy_clear(dut)
1143
1144 # check status
1145 yield from alusim.check(dut)
1146 yield from alusim.dump(dut)
1147
1148
1149 def scoreboard_sim(dut, alusim):
1150
1151 seed(0)
1152
1153 for i in range(1):
1154
1155 # set random values in the registers
1156 for i in range(1, dut.n_regs):
1157 #val = randint(0, (1<<alusim.rwidth)-1)
1158 #val = 31+i*3
1159 val = i
1160 yield dut.intregs.regs[i].reg.eq(val)
1161 alusim.setval(i, val)
1162
1163 # create some instructions (some random, some regression tests)
1164 instrs = []
1165 if False:
1166 instrs = create_random_ops(dut, 15, True, 4)
1167
1168 if False: # LD/ST test (with immediate)
1169 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1170 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1171
1172 if False:
1173 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1174
1175 if False:
1176 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1177 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1178 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1179
1180 if True:
1181 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1182 0, 0, (0, 0)))
1183 instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1184 0, 0, (0, 0)))
1185 if False:
1186 instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1187 1, 7, (0, 0)))
1188 if False:
1189 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1190 0, 0, (0, 0)))
1191
1192 if False:
1193 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1194 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1195 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1196 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1197 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1198
1199 if False:
1200 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1201 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1202 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1203
1204 if False:
1205 instrs.append((5, 6, 2, 1))
1206 instrs.append((2, 2, 4, 0))
1207 #instrs.append((2, 2, 3, 1))
1208
1209 if False:
1210 instrs.append((2, 1, 2, 3))
1211
1212 if False:
1213 instrs.append((2, 6, 2, 1))
1214 instrs.append((2, 1, 2, 0))
1215
1216 if False:
1217 instrs.append((1, 2, 7, 2))
1218 instrs.append((7, 1, 5, 0))
1219 instrs.append((4, 4, 1, 1))
1220
1221 if False:
1222 instrs.append((5, 6, 2, 2))
1223 instrs.append((1, 1, 4, 1))
1224 instrs.append((6, 5, 3, 0))
1225
1226 if False:
1227 # Write-after-Write Hazard
1228 instrs.append((3, 6, 7, 2))
1229 instrs.append((4, 4, 7, 1))
1230
1231 if False:
1232 # self-read/write-after-write followed by Read-after-Write
1233 instrs.append((1, 1, 1, 1))
1234 instrs.append((1, 5, 3, 0))
1235
1236 if False:
1237 # Read-after-Write followed by self-read-after-write
1238 instrs.append((5, 6, 1, 2))
1239 instrs.append((1, 1, 1, 1))
1240
1241 if False:
1242 # self-read-write sandwich
1243 instrs.append((5, 6, 1, 2))
1244 instrs.append((1, 1, 1, 1))
1245 instrs.append((1, 5, 3, 0))
1246
1247 if False:
1248 # very weird failure
1249 instrs.append((5, 2, 5, 2))
1250 instrs.append((2, 6, 3, 0))
1251 instrs.append((4, 2, 2, 1))
1252
1253 if False:
1254 v1 = 4
1255 yield dut.intregs.regs[5].reg.eq(v1)
1256 alusim.setval(5, v1)
1257 yield dut.intregs.regs[3].reg.eq(5)
1258 alusim.setval(3, 5)
1259 instrs.append((5, 3, 3, 4, (0, 0)))
1260 instrs.append((4, 2, 1, 2, (0, 1)))
1261
1262 if False:
1263 v1 = 6
1264 yield dut.intregs.regs[5].reg.eq(v1)
1265 alusim.setval(5, v1)
1266 yield dut.intregs.regs[3].reg.eq(5)
1267 alusim.setval(3, 5)
1268 instrs.append((5, 3, 3, 4, (0, 0)))
1269 instrs.append((4, 2, 1, 2, (1, 0)))
1270
1271 if False:
1272 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1273 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1274 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1275 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1276 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1277 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1278 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1279 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1280 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1281
1282 # issue instruction(s), wait for issue to be free before proceeding
1283 for i, instr in enumerate(instrs):
1284 print(i, instr)
1285 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1286
1287 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1288 (i, src1, src2, dest, op, fn_unit, opi, imm))
1289 alusim.op(op, opi, imm, src1, src2, dest)
1290 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1291 br_ok, br_fail)
1292
1293 # wait for all instructions to stop before checking
1294 while True:
1295 iqlen = yield dut.qlen_o
1296 if iqlen == 0:
1297 break
1298 yield
1299 yield
1300 yield
1301 yield
1302 yield
1303 yield from wait_for_busy_clear(dut)
1304
1305 # check status
1306 yield from alusim.check(dut)
1307 yield from alusim.dump(dut)
1308
1309
1310 def test_scoreboard():
1311 regwidth = 64
1312 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1313 alusim = RegSim(regwidth, 8)
1314 memsim = MemSim(16, 8)
1315
1316 m = Module()
1317 comb = m.d.comb
1318 instruction = Signal(32)
1319
1320 # set up the decoder (and simulator, later)
1321 pdecode = create_pdecode()
1322 #simulator = ISA(pdecode, initial_regs)
1323
1324 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1325 m.submodules.sim = dut
1326
1327 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1328 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1329
1330 vl = rtlil.convert(m, ports=dut.ports())
1331 with open("test_scoreboard6600.il", "w") as f:
1332 f.write(vl)
1333
1334 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1335 vcd_name='test_powerboard6600.vcd')
1336
1337 # run_simulation(dut, scoreboard_sim(dut, alusim),
1338 # vcd_name='test_scoreboard6600.vcd')
1339
1340 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1341 # vcd_name='test_scoreboard6600.vcd')
1342
1343
1344 if __name__ == '__main__':
1345 test_scoreboard()