Merge branch 'master' of git.libre-soc.org:soc
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, ortreereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst_multi import LDSTCompUnit
20 from soc.experiment.compldst_multi import CompLDSTOpSubset
21 from soc.experiment.l0_cache import TstL0CacheBuffer
22
23 from soc.experiment.alu_hier import ALU, BranchALU
24 from soc.fu.alu.alu_input_record import CompALUOpSubset
25
26 from soc.decoder.power_enums import MicrOp, Function
27 from soc.decoder.power_decoder import (create_pdecode)
28 from soc.decoder.power_decoder2 import (PowerDecode2)
29 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
30
31 from soc.simulator.program import Program
32
33
34 from nmutil.latch import SRLatch
35 from nmutil.nmoperator import eq
36
37 from random import randint, seed
38 from copy import deepcopy
39 from math import log
40
41 from soc.experiment.sim import RegSim, MemSim
42 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
43
44
45 class CompUnitsBase(Elaboratable):
46 """ Computation Unit Base class.
47
48 Amazingly, this class works recursively. It's supposed to just
49 look after some ALUs (that can handle the same operations),
50 grouping them together, however it turns out that the same code
51 can also group *groups* of Computation Units together as well.
52
53 Basically it was intended just to concatenate the ALU's issue,
54 go_rd etc. signals together, which start out as bits and become
55 sequences. Turns out that the same trick works just as well
56 on Computation Units!
57
58 So this class may be used recursively to present a top-level
59 sequential concatenation of all the signals in and out of
60 ALUs, whilst at the same time making it convenient to group
61 ALUs together.
62
63 At the lower level, the intent is that groups of (identical)
64 ALUs may be passed the same operation. Even beyond that,
65 the intent is that that group of (identical) ALUs actually
66 share the *same pipeline* and as such become a "Concurrent
67 Computation Unit" as defined by Mitch Alsup (see section
68 11.4.9.3)
69 """
70
71 def __init__(self, rwid, units, ldstmode=False):
72 """ Inputs:
73
74 * :rwid: bit width of register file(s) - both FP and INT
75 * :units: sequence of ALUs (or CompUnitsBase derivatives)
76 """
77 self.units = units
78 self.ldstmode = ldstmode
79 self.rwid = rwid
80 self.rwid = rwid
81 if units and isinstance(units[0], CompUnitsBase):
82 self.n_units = 0
83 for u in self.units:
84 self.n_units += u.n_units
85 else:
86 self.n_units = len(units)
87
88 n_units = self.n_units
89
90 # inputs
91 self.issue_i = Signal(n_units, reset_less=True)
92 self.rd0 = go_record(n_units, "rd0")
93 self.rd1 = go_record(n_units, "rd1")
94 self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
95 self.wr0 = go_record(n_units, "wr0")
96 self.go_wr_i = [self.wr0.go]
97 self.shadown_i = Signal(n_units, reset_less=True)
98 self.go_die_i = Signal(n_units, reset_less=True)
99 if ldstmode:
100 self.go_ad_i = Signal(n_units, reset_less=True)
101 self.go_st_i = Signal(n_units, reset_less=True)
102
103 # outputs
104 self.busy_o = Signal(n_units, reset_less=True)
105 self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
106 self.req_rel_o = self.wr0.rel
107 self.done_o = Signal(n_units, reset_less=True)
108 if ldstmode:
109 self.ld_o = Signal(n_units, reset_less=True) # op is LD
110 self.st_o = Signal(n_units, reset_less=True) # op is ST
111 self.adr_rel_o = Signal(n_units, reset_less=True)
112 self.sto_rel_o = Signal(n_units, reset_less=True)
113 self.load_mem_o = Signal(n_units, reset_less=True)
114 self.stwd_mem_o = Signal(n_units, reset_less=True)
115 self.addr_o = Signal(rwid, reset_less=True)
116
117 # in/out register data (note: not register#, actual data)
118 self.data_o = Signal(rwid, reset_less=True)
119 self.src1_i = Signal(rwid, reset_less=True)
120 self.src2_i = Signal(rwid, reset_less=True)
121 # input operand
122
123 def elaborate(self, platform):
124 m = Module()
125 comb = m.d.comb
126
127 for i, alu in enumerate(self.units):
128 setattr(m.submodules, "comp%d" % i, alu)
129
130 go_rd_l0 = []
131 go_rd_l1 = []
132 go_wr_l = []
133 issue_l = []
134 busy_l = []
135 req_rel_l = []
136 done_l = []
137 rd_rel0_l = []
138 rd_rel1_l = []
139 shadow_l = []
140 godie_l = []
141 for alu in self.units:
142 req_rel_l.append(alu.req_rel_o)
143 done_l.append(alu.done_o)
144 shadow_l.append(alu.shadown_i)
145 godie_l.append(alu.go_die_i)
146 print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
147 rd_rel0_l.append(alu.rd_rel_o[0])
148 rd_rel1_l.append(alu.rd_rel_o[1])
149 go_wr_l.append(alu.go_wr_i)
150 go_rd_l0.append(alu.go_rd_i[0])
151 go_rd_l1.append(alu.go_rd_i[1])
152 issue_l.append(alu.issue_i)
153 busy_l.append(alu.busy_o)
154 comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
155 comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
156 comb += self.req_rel_o.eq(Cat(*req_rel_l))
157 comb += self.done_o.eq(Cat(*done_l))
158 comb += self.busy_o.eq(Cat(*busy_l))
159 comb += Cat(*godie_l).eq(self.go_die_i)
160 comb += Cat(*shadow_l).eq(self.shadown_i)
161 comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
162 comb += Cat(*go_rd_l0).eq(self.rd0.go)
163 comb += Cat(*go_rd_l1).eq(self.rd1.go)
164 comb += Cat(*issue_l).eq(self.issue_i)
165
166 # connect data register input/output
167
168 # merge (OR) all integer FU / ALU outputs to a single value
169 # XXX NOTE: this only works because there is a single "port"
170 # protected by a single go_wr. multi-issue requires a bus
171 # to be inserted here.
172 if self.units:
173 data_o = ortreereduce(self.units, "data_o")
174 comb += self.data_o.eq(data_o)
175 if self.ldstmode:
176 addr_o = ortreereduce(self.units, "addr_o")
177 comb += self.addr_o.eq(addr_o)
178
179 for i, alu in enumerate(self.units):
180 comb += alu.src1_i.eq(self.src1_i)
181 comb += alu.src2_i.eq(self.src2_i)
182
183 if not self.ldstmode:
184 return m
185
186 ldmem_l = []
187 stmem_l = []
188 go_ad_l = []
189 go_st_l = []
190 ld_l = []
191 st_l = []
192 adr_rel_l = []
193 sto_rel_l = []
194 for alu in self.units:
195 ld_l.append(alu.ld_o)
196 st_l.append(alu.st_o)
197 adr_rel_l.append(alu.adr_rel_o)
198 sto_rel_l.append(alu.sto_rel_o)
199 ldmem_l.append(alu.load_mem_o)
200 stmem_l.append(alu.stwd_mem_o)
201 go_ad_l.append(alu.go_ad_i)
202 go_st_l.append(alu.go_st_i)
203 comb += self.ld_o.eq(Cat(*ld_l))
204 comb += self.st_o.eq(Cat(*st_l))
205 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
206 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
207 comb += self.load_mem_o.eq(Cat(*ldmem_l))
208 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
209 comb += Cat(*go_ad_l).eq(self.go_ad_i)
210 comb += Cat(*go_st_l).eq(self.go_st_i)
211
212 return m
213
214
215 class CompUnitLDSTs(CompUnitsBase):
216
217 def __init__(self, rwid, opwid, n_ldsts, l0):
218 """ Inputs:
219
220 * :rwid: bit width of register file(s) - both FP and INT
221 * :opwid: operand bit width
222 """
223 self.opwid = opwid
224
225 # inputs
226 self.op = CompLDSTOpSubset("cul_i")
227
228 # LD/ST Units
229 units = []
230 for i in range(n_ldsts):
231 pi = l0.l0.dports[i].pi
232 units.append(LDSTCompUnit(pi, rwid, awid=48))
233
234 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
235
236 def elaborate(self, platform):
237 m = CompUnitsBase.elaborate(self, platform)
238 comb = m.d.comb
239
240 # hand the same operation to all units
241 for ldst in self.units:
242 comb += ldst.oper_i.eq(self.op)
243
244 return m
245
246
247 class CompUnitALUs(CompUnitsBase):
248
249 def __init__(self, rwid, opwid, n_alus):
250 """ Inputs:
251
252 * :rwid: bit width of register file(s) - both FP and INT
253 * :opwid: operand bit width
254 """
255 self.opwid = opwid
256
257 # inputs
258 self.op = CompALUOpSubset("cua_i")
259
260 # Int ALUs
261 alus = []
262 for i in range(n_alus):
263 alus.append(ALU(rwid))
264
265 units = []
266 for alu in alus:
267 aluopwid = 3 # extra bit for immediate mode
268 units.append(MultiCompUnit(rwid, alu, CompALUOpSubset))
269
270 CompUnitsBase.__init__(self, rwid, units)
271
272 def elaborate(self, platform):
273 m = CompUnitsBase.elaborate(self, platform)
274 comb = m.d.comb
275
276 # hand the subset of operation to ALUs
277 for alu in self.units:
278 comb += alu.oper_i.eq(self.op)
279
280 return m
281
282
283 class CompUnitBR(CompUnitsBase):
284
285 def __init__(self, rwid, opwid):
286 """ Inputs:
287
288 * :rwid: bit width of register file(s) - both FP and INT
289 * :opwid: operand bit width
290
291 Note: bgt unit is returned so that a shadow unit can be created
292 for it
293 """
294 self.opwid = opwid
295
296 # inputs
297 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
298 self.oper_i = Signal(opwid, reset_less=True)
299 self.imm_i = Signal(rwid, reset_less=True)
300
301 # Branch ALU and CU
302 self.bgt = BranchALU(rwid)
303 aluopwid = 3 # extra bit for immediate mode
304 self.br1 = MultiCompUnit(rwid, self.bgt, CompALUOpSubset)
305 CompUnitsBase.__init__(self, rwid, [self.br1])
306
307 def elaborate(self, platform):
308 m = CompUnitsBase.elaborate(self, platform)
309 comb = m.d.comb
310
311 # hand the same operation to all units
312 for alu in self.units:
313 # comb += alu.oper_i.eq(self.op) # TODO
314 comb += alu.oper_i.eq(self.oper_i)
315 #comb += alu.imm_i.eq(self.imm_i)
316
317 return m
318
319
320 class FunctionUnits(Elaboratable):
321
322 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
323 self.n_src, self.n_dst = n_src, n_dst
324 self.n_reg = n_reg
325 self.n_int_alus = nf = n_int_alus
326
327 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
328 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
329
330 self.readable_o = Signal(n_int_alus, reset_less=True)
331 self.writable_o = Signal(n_int_alus, reset_less=True)
332
333 # arrays
334 src = []
335 rsel = []
336 rd = []
337 for i in range(n_src):
338 j = i + 1 # name numbering to match src1/src2
339 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
340 rsel.append(Signal(n_reg, name="src%d_rsel_o" %
341 j, reset_less=True))
342 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
343 dst = []
344 dsel = []
345 wr = []
346 for i in range(n_dst):
347 j = i + 1 # name numbering to match src1/src2
348 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
349 dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
350 j, reset_less=True))
351 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
352 wpnd = []
353 pend = []
354 for i in range(nf):
355 j = i + 1 # name numbering to match src1/src2
356 pend.append(Signal(nf, name="rd_src%d_pend_o" %
357 j, reset_less=True))
358 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
359 j, reset_less=True))
360
361 self.dest_i = Array(dst) # Dest in (top)
362 self.src_i = Array(src) # oper in (top)
363
364 # for Register File Select Lines (horizontal), per-reg
365 self.dst_rsel_o = Array(dsel) # dest reg (bot)
366 self.src_rsel_o = Array(rsel) # src reg (bot)
367
368 self.go_rd_i = Array(rd)
369 self.go_wr_i = Array(wr)
370
371 self.go_die_i = Signal(n_int_alus, reset_less=True)
372 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
373
374 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
375
376 def elaborate(self, platform):
377 m = Module()
378 comb = m.d.comb
379 sync = m.d.sync
380
381 n_intfus = self.n_int_alus
382
383 # Integer FU-FU Dep Matrix
384 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
385 m.submodules.intfudeps = intfudeps
386 # Integer FU-Reg Dep Matrix
387 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
388 m.submodules.intregdeps = intregdeps
389
390 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
391 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
392
393 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
394 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
395
396 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
397 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
398 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
399
400 comb += intfudeps.issue_i.eq(self.fn_issue_i)
401 comb += intfudeps.go_die_i.eq(self.go_die_i)
402 comb += self.readable_o.eq(intfudeps.readable_o)
403 comb += self.writable_o.eq(intfudeps.writable_o)
404
405 # Connect function issue / arrays, and dest/src1/src2
406 for i in range(self.n_src):
407 print(i, self.go_rd_i, intfudeps.go_rd_i)
408 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
409 comb += intregdeps.src_i[i].eq(self.src_i[i])
410 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
411 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
412 for i in range(self.n_dst):
413 print(i, self.go_wr_i, intfudeps.go_wr_i)
414 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
415 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
416 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
417 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
418 comb += intregdeps.go_die_i.eq(self.go_die_i)
419 comb += intregdeps.issue_i.eq(self.fn_issue_i)
420
421 return m
422
423
424 class Scoreboard(Elaboratable):
425 def __init__(self, rwid, n_regs):
426 """ Inputs:
427
428 * :rwid: bit width of register file(s) - both FP and INT
429 * :n_regs: depth of register file(s) - number of FP and INT regs
430 """
431 self.rwid = rwid
432 self.n_regs = n_regs
433
434 # Register Files
435 self.intregs = RegFileArray(rwid, n_regs)
436 self.fpregs = RegFileArray(rwid, n_regs)
437
438 # Memory (test for now)
439 self.l0 = TstL0CacheBuffer()
440
441 # issue q needs to get at these
442 self.aluissue = IssueUnitGroup(2)
443 self.lsissue = IssueUnitGroup(2)
444 self.brissue = IssueUnitGroup(1)
445 # and these
446 self.instr = Decode2ToExecute1Type("sc_instr")
447 self.br_oper_i = Signal(4, reset_less=True)
448 self.br_imm_i = Signal(rwid, reset_less=True)
449 self.ls_oper_i = Signal(4, reset_less=True)
450
451 # inputs
452 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
453 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
454 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
455 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
456
457 # outputs
458 self.issue_o = Signal(reset_less=True) # instruction was accepted
459 self.busy_o = Signal(reset_less=True) # at least one CU is busy
460
461 # for branch speculation experiment. branch_direction = 0 if
462 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
463 # branch_succ and branch_fail are requests to have the current
464 # instruction be dependent on the branch unit "shadow" capability.
465 self.branch_succ_i = Signal(reset_less=True)
466 self.branch_fail_i = Signal(reset_less=True)
467 self.branch_direction_o = Signal(2, reset_less=True)
468
469 def elaborate(self, platform):
470 m = Module()
471 comb = m.d.comb
472 sync = m.d.sync
473
474 m.submodules.intregs = self.intregs
475 m.submodules.fpregs = self.fpregs
476 m.submodules.l0 = l0 = self.l0
477
478 # register ports
479 int_dest = self.intregs.write_port("dest")
480 int_src1 = self.intregs.read_port("src1")
481 int_src2 = self.intregs.read_port("src2")
482
483 fp_dest = self.fpregs.write_port("dest")
484 fp_src1 = self.fpregs.read_port("src1")
485 fp_src2 = self.fpregs.read_port("src2")
486
487 # Int ALUs and BR ALUs
488 n_int_alus = 5
489 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
490 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
491
492 # LDST Comp Units
493 n_ldsts = 2
494 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, l0)
495
496 # Comp Units
497 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
498 bgt = cub.bgt # get at the branch computation unit
499 br1 = cub.br1
500
501 # Int FUs
502 fu_n_src = 2
503 fu_n_dst = 1
504 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
505 fu_n_src, fu_n_dst)
506
507 # Memory FUs
508 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
509
510 # Memory Priority Picker 1: one gateway per memory port
511 # picks 1 reader and 1 writer to intreg
512 mempick1 = GroupPicker(n_ldsts, 1, 1)
513 m.submodules.mempick1 = mempick1
514
515 # Count of number of FUs
516 n_intfus = n_int_alus
517 n_fp_fus = 0 # for now
518
519 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
520 # picks 1 reader and 1 writer to intreg
521 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
522 m.submodules.intpick1 = ipick1
523
524 # INT/FP Issue Unit
525 regdecode = RegDecode(self.n_regs)
526 m.submodules.regdecode = regdecode
527 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
528 m.submodules.issueunit = issueunit
529
530 # Shadow Matrix. currently n_intfus shadows, to be used for
531 # write-after-write hazards. NOTE: there is one extra for branches,
532 # so the shadow width is increased by 1
533 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
534 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
535
536 # record previous instruction to cast shadow on current instruction
537 prev_shadow = Signal(n_intfus)
538
539 # Branch Speculation recorder. tracks the success/fail state as
540 # each instruction is issued, so that when the branch occurs the
541 # allow/cancel can be issued as appropriate.
542 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
543
544 # ---------
545 # ok start wiring things together...
546 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
547 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
548 # ---------
549
550 # ---------
551 # Issue Unit is where it starts. set up some in/outs for this module
552 # ---------
553 comb += [regdecode.dest_i.eq(self.int_dest_i),
554 regdecode.src1_i.eq(self.int_src1_i),
555 regdecode.src2_i.eq(self.int_src2_i),
556 regdecode.enable_i.eq(self.reg_enable_i),
557 self.issue_o.eq(issueunit.issue_o)
558 ]
559
560 # take these to outside (issue needs them)
561 comb += cua.op.eq_from_execute1(self.instr)
562 comb += cub.oper_i.eq(self.br_oper_i)
563 comb += cub.imm_i.eq(self.br_imm_i)
564 comb += cul.op.eq_from_execute1(self.instr)
565
566 # TODO: issueunit.f (FP)
567
568 # and int function issue / busy arrays, and dest/src1/src2
569 comb += intfus.dest_i[0].eq(regdecode.dest_o)
570 comb += intfus.src_i[0].eq(regdecode.src1_o)
571 comb += intfus.src_i[1].eq(regdecode.src2_o)
572
573 fn_issue_o = issueunit.fn_issue_o
574
575 comb += intfus.fn_issue_i.eq(fn_issue_o)
576 comb += issueunit.busy_i.eq(cu.busy_o)
577 comb += self.busy_o.eq(cu.busy_o.bool())
578
579 # ---------
580 # Memory Function Unit
581 # ---------
582 reset_b = Signal(cul.n_units, reset_less=True)
583 # XXX was cul.go_wr_i not done.o
584 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
585 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
586
587 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
588 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
589 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
590
591 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
592 # in a transitive fashion). This cycle activates based on LDSTCompUnit
593 # issue_i. multi-issue gets a bit more complex but not a lot.
594 prior_ldsts = Signal(cul.n_units, reset_less=True)
595 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
596 with m.If(self.ls_oper_i[3]): # LD bit of operand
597 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
598 with m.If(self.ls_oper_i[2]): # ST bit of operand
599 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
600
601 # TODO: adr_rel_o needs to go into L1 Cache. for now,
602 # just immediately activate go_adr
603 sync += cul.go_ad_i.eq(cul.adr_rel_o)
604
605 # connect up address data
606 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
607 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
608
609 # connect loadable / storable to go_ld/go_st.
610 # XXX should only be done when the memory ld/st has actually happened!
611 go_st_i = Signal(cul.n_units, reset_less=True)
612 go_ld_i = Signal(cul.n_units, reset_less=True)
613 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
614 cul.adr_rel_o & cul.ld_o)
615 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
616 cul.sto_rel_o & cul.st_o)
617 comb += memfus.go_ld_i.eq(go_ld_i)
618 comb += memfus.go_st_i.eq(go_st_i)
619 #comb += cul.go_wr_i.eq(go_ld_i)
620 comb += cul.go_st_i.eq(go_st_i)
621
622 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
623 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
624 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
625
626 # ---------
627 # merge shadow matrices outputs
628 # ---------
629
630 # these are explained in ShadowMatrix docstring, and are to be
631 # connected to the FUReg and FUFU Matrices, to get them to reset
632 anydie = Signal(n_intfus, reset_less=True)
633 allshadown = Signal(n_intfus, reset_less=True)
634 shreset = Signal(n_intfus, reset_less=True)
635 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
636 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
637 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
638
639 # ---------
640 # connect fu-fu matrix
641 # ---------
642
643 # Group Picker... done manually for now.
644 go_rd_o = ipick1.go_rd_o
645 go_wr_o = ipick1.go_wr_o
646 go_rd_i = intfus.go_rd_i
647 go_wr_i = intfus.go_wr_i
648 go_die_i = intfus.go_die_i
649 # NOTE: connect to the shadowed versions so that they can "die" (reset)
650 for i in range(fu_n_src):
651 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
652 for i in range(fu_n_dst):
653 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
654 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
655
656 # Connect Picker
657 # ---------
658 int_rd_o = intfus.readable_o
659 rrel_o = cu.rd_rel_o
660 rqrl_o = cu.req_rel_o
661 for i in range(fu_n_src):
662 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
663 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
664 int_wr_o = intfus.writable_o
665 for i in range(fu_n_dst):
666 # XXX FIXME: rqrl_o[i] here
667 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
668 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
669
670 # ---------
671 # Shadow Matrix
672 # ---------
673
674 comb += shadows.issue_i.eq(fn_issue_o)
675 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
676 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
677 # ---------
678 # NOTE; this setup is for the instruction order preservation...
679
680 # connect shadows / go_dies to Computation Units
681 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
682 comb += cu.go_die_i[0:n_intfus].eq(anydie)
683
684 # ok connect first n_int_fu shadows to busy lines, to create an
685 # instruction-order linked-list-like arrangement, using a bit-matrix
686 # (instead of e.g. a ring buffer).
687
688 # when written, the shadow can be cancelled (and was good)
689 for i in range(n_intfus):
690 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
691 # XXX experiment: use ~cu.busy_o instead. *should* be good
692 # because the comp unit is only free once completed
693 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
694
695 # *previous* instruction shadows *current* instruction, and, obviously,
696 # if the previous is completed (!busy) don't cast the shadow!
697 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
698 for i in range(n_intfus):
699 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
700
701 # ---------
702 # ... and this is for branch speculation. it uses the extra bit
703 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
704 # only needs to set shadow_i, s_fail_i and s_good_i
705
706 # issue captures shadow_i (if enabled)
707 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
708
709 bactive = Signal(reset_less=True)
710 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
711
712 # instruction being issued (fn_issue_o) has a shadow cast by the branch
713 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
714 comb += bshadow.issue_i.eq(fn_issue_o)
715 for i in range(n_intfus):
716 with m.If(fn_issue_o & (Const(1 << i))):
717 comb += bshadow.shadow_i[i][0].eq(1)
718
719 # finally, we need an indicator to the test infrastructure as to
720 # whether the branch succeeded or failed, plus, link up to the
721 # "recorder" of whether the instruction was under shadow or not
722
723 with m.If(br1.issue_i):
724 sync += bspec.active_i.eq(1)
725 with m.If(self.branch_succ_i):
726 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
727 with m.If(self.branch_fail_i):
728 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
729
730 # branch is active (TODO: a better signal: this is over-using the
731 # go_write signal - actually the branch should not be "writing")
732 with m.If(br1.go_wr_i):
733 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
734 sync += bspec.active_i.eq(0)
735 comb += bspec.br_i.eq(1)
736 # branch occurs if data == 1, failed if data == 0
737 comb += bspec.br_ok_i.eq(br1.data_o == 1)
738 for i in range(n_intfus):
739 # *expected* direction of the branch matched against *actual*
740 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
741 # ... or it didn't
742 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
743
744 # ---------
745 # Connect Register File(s)
746 # ---------
747 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
748 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
749 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
750
751 # connect ALUs to regfile
752 comb += int_dest.data_i.eq(cu.data_o)
753 comb += cu.src1_i.eq(int_src1.data_o)
754 comb += cu.src2_i.eq(int_src2.data_o)
755
756 # connect ALU Computation Units
757 for i in range(fu_n_src):
758 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
759 for i in range(fu_n_dst):
760 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
761 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
762
763 return m
764
765 def __iter__(self):
766 yield from self.intregs
767 yield from self.fpregs
768 yield self.int_dest_i
769 yield self.int_src1_i
770 yield self.int_src2_i
771 yield self.issue_o
772 yield self.branch_succ_i
773 yield self.branch_fail_i
774 yield self.branch_direction_o
775
776 def ports(self):
777 return list(self)
778
779
780 class IssueToScoreboard(Elaboratable):
781
782 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
783 self.qlen = qlen
784 self.n_in = n_in
785 self.n_out = n_out
786 self.rwid = rwid
787 self.opw = opwid
788 self.n_regs = n_regs
789
790 mqbits = unsigned(int(log(qlen) / log(2))+2)
791 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
792 self.p_ready_o = Signal() # instructions were added
793 self.data_i = Instruction._nq(n_in, "data_i")
794
795 self.busy_o = Signal(reset_less=True) # at least one CU is busy
796 self.qlen_o = Signal(mqbits, reset_less=True)
797
798 def elaborate(self, platform):
799 m = Module()
800 comb = m.d.comb
801 sync = m.d.sync
802
803 iq = InstructionQ(self.rwid, self.opw, self.qlen,
804 self.n_in, self.n_out)
805 sc = Scoreboard(self.rwid, self.n_regs)
806 m.submodules.iq = iq
807 m.submodules.sc = sc
808
809 # get at the regfile for testing
810 self.intregs = sc.intregs
811
812 # and the "busy" signal and instruction queue length
813 comb += self.busy_o.eq(sc.busy_o)
814 comb += self.qlen_o.eq(iq.qlen_o)
815
816 # link up instruction queue
817 comb += iq.p_add_i.eq(self.p_add_i)
818 comb += self.p_ready_o.eq(iq.p_ready_o)
819 for i in range(self.n_in):
820 comb += eq(iq.data_i[i], self.data_i[i])
821
822 # take instruction and process it. note that it's possible to
823 # "inspect" the queue contents *without* actually removing the
824 # items. items are only removed when the
825
826 # in "waiting" state
827 wait_issue_br = Signal()
828 wait_issue_alu = Signal()
829 wait_issue_ls = Signal()
830
831 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
832 # set instruction pop length to 1 if the unit accepted
833 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
834 with m.If(iq.qlen_o != 0):
835 comb += iq.n_sub_i.eq(1)
836 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
837 with m.If(iq.qlen_o != 0):
838 comb += iq.n_sub_i.eq(1)
839 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
840 with m.If(iq.qlen_o != 0):
841 comb += iq.n_sub_i.eq(1)
842
843 # see if some instruction(s) are here. note that this is
844 # "inspecting" the in-place queue. note also that on the
845 # cycle following "waiting" for fn_issue_o to be set, the
846 # "resetting" done above (insn_i=0) could be re-ASSERTed.
847 with m.If(iq.qlen_o != 0):
848 # get the operands and operation
849 instr = iq.data_o[0]
850 imm = instr.imm_data.data
851 dest = instr.write_reg.data
852 src1 = instr.read_reg1.data
853 src2 = instr.read_reg2.data
854 op = instr.insn_type
855 fu = instr.fn_unit
856 opi = instr.imm_data.ok # immediate set
857
858 # set the src/dest regs
859 comb += sc.int_dest_i.eq(dest)
860 comb += sc.int_src1_i.eq(src1)
861 comb += sc.int_src2_i.eq(src2)
862 comb += sc.reg_enable_i.eq(1) # enable the regfile
863 comb += sc.instr.eq(instr)
864
865 # choose a Function-Unit-Group
866 with m.If(fu == Function.ALU): # alu
867 comb += sc.aluissue.insn_i.eq(1) # enable alu issue
868 comb += wait_issue_alu.eq(1)
869 with m.Elif(fu == Function.LDST): # ld/st
870 comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
871 comb += wait_issue_ls.eq(1)
872
873 with m.Elif((op & (0x3 << 2)) != 0): # branch
874 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
875 comb += sc.br_imm_i.eq(imm)
876 comb += sc.brissue.insn_i.eq(1)
877 comb += wait_issue_br.eq(1)
878 # XXX TODO
879 # these indicate that the instruction is to be made
880 # shadow-dependent on
881 # (either) branch success or branch fail
882 # yield sc.branch_fail_i.eq(branch_fail)
883 # yield sc.branch_succ_i.eq(branch_success)
884
885 return m
886
887 def __iter__(self):
888 yield self.p_ready_o
889 for o in self.data_i:
890 yield from list(o)
891 yield self.p_add_i
892
893 def ports(self):
894 return list(self)
895
896
897 def power_instr_q(dut, pdecode2, ins, code):
898 instrs = [pdecode2.e]
899
900 sendlen = 1
901 for idx, instr in enumerate(instrs):
902 yield dut.data_i[idx].eq(instr)
903 insn_type = yield instr.insn_type
904 fn_unit = yield instr.fn_unit
905 print("senddata ", idx, insn_type, fn_unit, instr)
906 yield dut.p_add_i.eq(sendlen)
907 yield
908 o_p_ready = yield dut.p_ready_o
909 while not o_p_ready:
910 yield
911 o_p_ready = yield dut.p_ready_o
912
913 yield dut.p_add_i.eq(0)
914
915
916 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
917 branch_success, branch_fail):
918 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
919 'imm_data': (imm, op_imm),
920 'read_reg1': src1, 'read_reg2': src2}]
921
922 sendlen = 1
923 for idx, instr in enumerate(instrs):
924 imm, op_imm = instr['imm_data']
925 reg1 = instr['read_reg1']
926 reg2 = instr['read_reg2']
927 dest = instr['write_reg']
928 insn_type = instr['insn_type']
929 fn_unit = instr['fn_unit']
930 yield dut.data_i[idx].insn_type.eq(insn_type)
931 yield dut.data_i[idx].fn_unit.eq(fn_unit)
932 yield dut.data_i[idx].read_reg1.data.eq(reg1)
933 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
934 yield dut.data_i[idx].read_reg2.data.eq(reg2)
935 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
936 yield dut.data_i[idx].write_reg.data.eq(dest)
937 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
938 yield dut.data_i[idx].imm_data.data.eq(imm)
939 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
940 di = yield dut.data_i[idx]
941 print("senddata %d %x" % (idx, di))
942 yield dut.p_add_i.eq(sendlen)
943 yield
944 o_p_ready = yield dut.p_ready_o
945 while not o_p_ready:
946 yield
947 o_p_ready = yield dut.p_ready_o
948
949 yield dut.p_add_i.eq(0)
950
951
952 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
953 yield from disable_issue(dut)
954 yield dut.int_dest_i.eq(dest)
955 yield dut.int_src1_i.eq(src1)
956 yield dut.int_src2_i.eq(src2)
957 if (op & (0x3 << 2)) != 0: # branch
958 yield dut.brissue.insn_i.eq(1)
959 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
960 yield dut.br_imm_i.eq(imm)
961 dut_issue = dut.brissue
962 else:
963 yield dut.aluissue.insn_i.eq(1)
964 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
965 yield dut.alu_imm_i.eq(imm)
966 dut_issue = dut.aluissue
967 yield dut.reg_enable_i.eq(1)
968
969 # these indicate that the instruction is to be made shadow-dependent on
970 # (either) branch success or branch fail
971 yield dut.branch_fail_i.eq(branch_fail)
972 yield dut.branch_succ_i.eq(branch_success)
973
974 yield
975 yield from wait_for_issue(dut, dut_issue)
976
977
978 def print_reg(dut, rnums):
979 rs = []
980 for rnum in rnums:
981 reg = yield dut.intregs.regs[rnum].reg
982 rs.append("%x" % reg)
983 rnums = map(str, rnums)
984 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
985
986
987 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
988 insts = []
989 for i in range(n_ops):
990 src1 = randint(1, dut.n_regs-1)
991 src2 = randint(1, dut.n_regs-1)
992 imm = randint(1, (1 << dut.rwid)-1)
993 dest = randint(1, dut.n_regs-1)
994 op = randint(0, max_opnums)
995 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
996
997 if shadowing:
998 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
999 else:
1000 insts.append((src1, src2, dest, op, opi, imm))
1001 return insts
1002
1003
1004 def wait_for_busy_clear(dut):
1005 while True:
1006 busy_o = yield dut.busy_o
1007 if not busy_o:
1008 break
1009 print("busy",)
1010 yield
1011
1012
1013 def disable_issue(dut):
1014 yield dut.aluissue.insn_i.eq(0)
1015 yield dut.brissue.insn_i.eq(0)
1016 yield dut.lsissue.insn_i.eq(0)
1017
1018
1019 def wait_for_issue(dut, dut_issue):
1020 while True:
1021 issue_o = yield dut_issue.fn_issue_o
1022 if issue_o:
1023 yield from disable_issue(dut)
1024 yield dut.reg_enable_i.eq(0)
1025 break
1026 print("busy",)
1027 # yield from print_reg(dut, [1,2,3])
1028 yield
1029 # yield from print_reg(dut, [1,2,3])
1030
1031
1032 def scoreboard_branch_sim(dut, alusim):
1033
1034 iseed = 3
1035
1036 for i in range(1):
1037
1038 print("rseed", iseed)
1039 seed(iseed)
1040 iseed += 1
1041
1042 yield dut.branch_direction_o.eq(0)
1043
1044 # set random values in the registers
1045 for i in range(1, dut.n_regs):
1046 val = 31+i*3
1047 val = randint(0, (1 << alusim.rwidth)-1)
1048 yield dut.intregs.regs[i].reg.eq(val)
1049 alusim.setval(i, val)
1050
1051 if False:
1052 # create some instructions: branches create a tree
1053 insts = create_random_ops(dut, 1, True, 1)
1054 #insts.append((6, 6, 1, 2, (0, 0)))
1055 #insts.append((4, 3, 3, 0, (0, 0)))
1056
1057 src1 = randint(1, dut.n_regs-1)
1058 src2 = randint(1, dut.n_regs-1)
1059 #op = randint(4, 7)
1060 op = 4 # only BGT at the moment
1061
1062 branch_ok = create_random_ops(dut, 1, True, 1)
1063 branch_fail = create_random_ops(dut, 1, True, 1)
1064
1065 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1066
1067 if True:
1068 insts = []
1069 insts.append((3, 5, 2, 0, (0, 0)))
1070 branch_ok = []
1071 branch_fail = []
1072 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1073 branch_ok.append(None)
1074 branch_fail.append((1, 1, 2, 0, (0, 1)))
1075 #branch_fail.append( None )
1076 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1077
1078 siminsts = deepcopy(insts)
1079
1080 # issue instruction(s)
1081 i = -1
1082 instrs = insts
1083 branch_direction = 0
1084 while instrs:
1085 yield
1086 yield
1087 i += 1
1088 branch_direction = yield dut.branch_direction_o # way branch went
1089 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1090 if branch_direction == 1 and shadow_on:
1091 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1092 continue # branch was "success" and this is a "failed"... skip
1093 if branch_direction == 2 and shadow_off:
1094 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1095 continue # branch was "fail" and this is a "success"... skip
1096 if branch_direction != 0:
1097 shadow_on = 0
1098 shadow_off = 0
1099 is_branch = op >= 4
1100 if is_branch:
1101 branch_ok, branch_fail = dest
1102 dest = src2
1103 # ok zip up the branch success / fail instructions and
1104 # drop them into the queue, one marked "to have branch success"
1105 # the other to be marked shadow branch "fail".
1106 # one out of each of these will be cancelled
1107 for ok, fl in zip(branch_ok, branch_fail):
1108 if ok:
1109 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1110 if fl:
1111 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1112 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1113 (i, src1, src2, dest, op, shadow_on, shadow_off))
1114 yield from int_instr(dut, op, src1, src2, dest,
1115 shadow_on, shadow_off)
1116
1117 # wait for all instructions to stop before checking
1118 yield
1119 yield from wait_for_busy_clear(dut)
1120
1121 i = -1
1122 while siminsts:
1123 instr = siminsts.pop(0)
1124 if instr is None:
1125 continue
1126 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1127 i += 1
1128 is_branch = op >= 4
1129 if is_branch:
1130 branch_ok, branch_fail = dest
1131 dest = src2
1132 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1133 (i, src1, src2, dest, op, shadow_on, shadow_off))
1134 branch_res = alusim.op(op, src1, src2, dest)
1135 if is_branch:
1136 if branch_res:
1137 siminsts += branch_ok
1138 else:
1139 siminsts += branch_fail
1140
1141 # check status
1142 yield from alusim.check(dut)
1143 yield from alusim.dump(dut)
1144
1145
1146 def power_sim(m, dut, pdecode2, instruction, alusim):
1147
1148 seed(0)
1149
1150 for i in range(1):
1151
1152 # set random values in the registers
1153 for i in range(1, dut.n_regs):
1154 #val = randint(0, (1<<alusim.rwidth)-1)
1155 #val = 31+i*3
1156 val = i # XXX actually, not random at all
1157 yield dut.intregs.regs[i].reg.eq(val)
1158 alusim.setval(i, val)
1159
1160 # create some instructions
1161 lst = []
1162 if False:
1163 lst += ["addi 2, 0, 0x4321",
1164 "addi 3, 0, 0x1234",
1165 "add 1, 3, 2",
1166 "add 4, 3, 5"
1167 ]
1168 if True:
1169 lst += ["lbzu 6, 7(2)",
1170
1171 ]
1172
1173 with Program(lst) as program:
1174 gen = program.generate_instructions()
1175
1176 # issue instruction(s), wait for issue to be free before proceeding
1177 for ins, code in zip(gen, program.assembly.splitlines()):
1178 yield instruction.eq(ins) # raw binary instr.
1179 yield # Delay(1e-6)
1180
1181 print("binary 0x{:X}".format(ins & 0xffffffff))
1182 print("assembly", code)
1183
1184 #alusim.op(op, opi, imm, src1, src2, dest)
1185 yield from power_instr_q(dut, pdecode2, ins, code)
1186
1187 # wait for all instructions to stop before checking
1188 while True:
1189 iqlen = yield dut.qlen_o
1190 if iqlen == 0:
1191 break
1192 yield
1193 yield
1194 yield
1195 yield
1196 yield
1197 yield from wait_for_busy_clear(dut)
1198
1199 # check status
1200 yield from alusim.check(dut)
1201 yield from alusim.dump(dut)
1202
1203
1204 def scoreboard_sim(dut, alusim):
1205
1206 seed(0)
1207
1208 for i in range(1):
1209
1210 # set random values in the registers
1211 for i in range(1, dut.n_regs):
1212 #val = randint(0, (1<<alusim.rwidth)-1)
1213 #val = 31+i*3
1214 val = i
1215 yield dut.intregs.regs[i].reg.eq(val)
1216 alusim.setval(i, val)
1217
1218 # create some instructions (some random, some regression tests)
1219 instrs = []
1220 if False:
1221 instrs = create_random_ops(dut, 15, True, 4)
1222
1223 if False: # LD/ST test (with immediate)
1224 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1225 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1226
1227 if False:
1228 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1229
1230 if False:
1231 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1232 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1233 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1234
1235 if True:
1236 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1237 0, 0, (0, 0)))
1238 instrs.append((5, 3, 3, MicrOp.OP_ADD, Function.ALU,
1239 0, 0, (0, 0)))
1240 if False:
1241 instrs.append((3, 5, 5, MicrOp.OP_MUL_L64, Function.ALU,
1242 1, 7, (0, 0)))
1243 if False:
1244 instrs.append((2, 3, 3, MicrOp.OP_ADD, Function.ALU,
1245 0, 0, (0, 0)))
1246
1247 if False:
1248 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1249 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1250 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1251 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1252 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1253
1254 if False:
1255 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1256 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1257 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1258
1259 if False:
1260 instrs.append((5, 6, 2, 1))
1261 instrs.append((2, 2, 4, 0))
1262 #instrs.append((2, 2, 3, 1))
1263
1264 if False:
1265 instrs.append((2, 1, 2, 3))
1266
1267 if False:
1268 instrs.append((2, 6, 2, 1))
1269 instrs.append((2, 1, 2, 0))
1270
1271 if False:
1272 instrs.append((1, 2, 7, 2))
1273 instrs.append((7, 1, 5, 0))
1274 instrs.append((4, 4, 1, 1))
1275
1276 if False:
1277 instrs.append((5, 6, 2, 2))
1278 instrs.append((1, 1, 4, 1))
1279 instrs.append((6, 5, 3, 0))
1280
1281 if False:
1282 # Write-after-Write Hazard
1283 instrs.append((3, 6, 7, 2))
1284 instrs.append((4, 4, 7, 1))
1285
1286 if False:
1287 # self-read/write-after-write followed by Read-after-Write
1288 instrs.append((1, 1, 1, 1))
1289 instrs.append((1, 5, 3, 0))
1290
1291 if False:
1292 # Read-after-Write followed by self-read-after-write
1293 instrs.append((5, 6, 1, 2))
1294 instrs.append((1, 1, 1, 1))
1295
1296 if False:
1297 # self-read-write sandwich
1298 instrs.append((5, 6, 1, 2))
1299 instrs.append((1, 1, 1, 1))
1300 instrs.append((1, 5, 3, 0))
1301
1302 if False:
1303 # very weird failure
1304 instrs.append((5, 2, 5, 2))
1305 instrs.append((2, 6, 3, 0))
1306 instrs.append((4, 2, 2, 1))
1307
1308 if False:
1309 v1 = 4
1310 yield dut.intregs.regs[5].reg.eq(v1)
1311 alusim.setval(5, v1)
1312 yield dut.intregs.regs[3].reg.eq(5)
1313 alusim.setval(3, 5)
1314 instrs.append((5, 3, 3, 4, (0, 0)))
1315 instrs.append((4, 2, 1, 2, (0, 1)))
1316
1317 if False:
1318 v1 = 6
1319 yield dut.intregs.regs[5].reg.eq(v1)
1320 alusim.setval(5, v1)
1321 yield dut.intregs.regs[3].reg.eq(5)
1322 alusim.setval(3, 5)
1323 instrs.append((5, 3, 3, 4, (0, 0)))
1324 instrs.append((4, 2, 1, 2, (1, 0)))
1325
1326 if False:
1327 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1328 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1329 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1330 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1331 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1332 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1333 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1334 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1335 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1336
1337 # issue instruction(s), wait for issue to be free before proceeding
1338 for i, instr in enumerate(instrs):
1339 print(i, instr)
1340 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1341
1342 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1343 (i, src1, src2, dest, op, fn_unit, opi, imm))
1344 alusim.op(op, opi, imm, src1, src2, dest)
1345 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1346 br_ok, br_fail)
1347
1348 # wait for all instructions to stop before checking
1349 while True:
1350 iqlen = yield dut.qlen_o
1351 if iqlen == 0:
1352 break
1353 yield
1354 yield
1355 yield
1356 yield
1357 yield
1358 yield from wait_for_busy_clear(dut)
1359
1360 # check status
1361 yield from alusim.check(dut)
1362 yield from alusim.dump(dut)
1363
1364
1365 def test_scoreboard():
1366 regwidth = 64
1367 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1368 alusim = RegSim(regwidth, 8)
1369 memsim = MemSim(16, 8)
1370
1371 m = Module()
1372 comb = m.d.comb
1373 instruction = Signal(32)
1374
1375 # set up the decoder (and simulator, later)
1376 pdecode = create_pdecode()
1377 #simulator = ISA(pdecode, initial_regs)
1378
1379 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1380 m.submodules.sim = dut
1381
1382 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1383 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1384
1385 vl = rtlil.convert(m, ports=dut.ports())
1386 with open("test_scoreboard6600.il", "w") as f:
1387 f.write(vl)
1388
1389 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1390 vcd_name='test_powerboard6600.vcd')
1391
1392 # run_simulation(dut, scoreboard_sim(dut, alusim),
1393 # vcd_name='test_scoreboard6600.vcd')
1394
1395 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1396 # vcd_name='test_scoreboard6600.vcd')
1397
1398
1399 if __name__ == '__main__':
1400 test_scoreboard()