hair-raising series of half-way-house changes which gets a mix of add/addi
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import MultiCompUnit, go_record
19 from soc.experiment.compldst import LDSTCompUnit
20 from soc.experiment.testmem import TestMemory
21
22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
23
24 from soc.decoder.power_enums import InternalOp, Function
25 from soc.decoder.power_decoder import (create_pdecode)
26 from soc.decoder.power_decoder2 import (PowerDecode2)
27 from soc.simulator.program import Program
28
29
30 from nmutil.latch import SRLatch
31 from nmutil.nmoperator import eq
32
33 from random import randint, seed
34 from copy import deepcopy
35 from math import log
36
37 from soc.experiment.sim import RegSim, MemSim
38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
39
40
41 class CompUnitsBase(Elaboratable):
42 """ Computation Unit Base class.
43
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
48
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
52 on Computation Units!
53
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
57 ALUs together.
58
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
64 11.4.9.3)
65 """
66
67 def __init__(self, rwid, units, ldstmode=False):
68 """ Inputs:
69
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
72 """
73 self.units = units
74 self.ldstmode = ldstmode
75 self.rwid = rwid
76 self.rwid = rwid
77 if units and isinstance(units[0], CompUnitsBase):
78 self.n_units = 0
79 for u in self.units:
80 self.n_units += u.n_units
81 else:
82 self.n_units = len(units)
83
84 n_units = self.n_units
85
86 # inputs
87 self.issue_i = Signal(n_units, reset_less=True)
88 self.rd0 = go_record(n_units, "rd0")
89 self.rd1 = go_record(n_units, "rd1")
90 self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
91 self.wr0 = go_record(n_units, "wr0")
92 self.go_wr_i = [self.wr0.go]
93 self.shadown_i = Signal(n_units, reset_less=True)
94 self.go_die_i = Signal(n_units, reset_less=True)
95 if ldstmode:
96 self.go_ad_i = Signal(n_units, reset_less=True)
97 self.go_st_i = Signal(n_units, reset_less=True)
98
99 # outputs
100 self.busy_o = Signal(n_units, reset_less=True)
101 self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
102 self.req_rel_o = self.wr0.rel
103 self.done_o = Signal(n_units, reset_less=True)
104 if ldstmode:
105 self.ld_o = Signal(n_units, reset_less=True) # op is LD
106 self.st_o = Signal(n_units, reset_less=True) # op is ST
107 self.adr_rel_o = Signal(n_units, reset_less=True)
108 self.sto_rel_o = Signal(n_units, reset_less=True)
109 self.load_mem_o = Signal(n_units, reset_less=True)
110 self.stwd_mem_o = Signal(n_units, reset_less=True)
111 self.addr_o = Signal(rwid, reset_less=True)
112
113 # in/out register data (note: not register#, actual data)
114 self.data_o = Signal(rwid, reset_less=True)
115 self.src1_i = Signal(rwid, reset_less=True)
116 self.src2_i = Signal(rwid, reset_less=True)
117 # input operand
118
119 def elaborate(self, platform):
120 m = Module()
121 comb = m.d.comb
122
123 for i, alu in enumerate(self.units):
124 setattr(m.submodules, "comp%d" % i, alu)
125
126 go_rd_l0 = []
127 go_rd_l1 = []
128 go_wr_l = []
129 issue_l = []
130 busy_l = []
131 req_rel_l = []
132 done_l = []
133 rd_rel0_l = []
134 rd_rel1_l = []
135 shadow_l = []
136 godie_l = []
137 for alu in self.units:
138 req_rel_l.append(alu.req_rel_o)
139 done_l.append(alu.done_o)
140 shadow_l.append(alu.shadown_i)
141 godie_l.append(alu.go_die_i)
142 print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
143 rd_rel0_l.append(alu.rd_rel_o[0])
144 rd_rel1_l.append(alu.rd_rel_o[1])
145 go_wr_l.append(alu.go_wr_i)
146 go_rd_l0.append(alu.go_rd_i[0])
147 go_rd_l1.append(alu.go_rd_i[1])
148 issue_l.append(alu.issue_i)
149 busy_l.append(alu.busy_o)
150 comb += self.rd0.rel.eq(Cat(*rd_rel0_l))
151 comb += self.rd1.rel.eq(Cat(*rd_rel1_l))
152 comb += self.req_rel_o.eq(Cat(*req_rel_l))
153 comb += self.done_o.eq(Cat(*done_l))
154 comb += self.busy_o.eq(Cat(*busy_l))
155 comb += Cat(*godie_l).eq(self.go_die_i)
156 comb += Cat(*shadow_l).eq(self.shadown_i)
157 comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
158 comb += Cat(*go_rd_l0).eq(self.rd0.go)
159 comb += Cat(*go_rd_l1).eq(self.rd1.go)
160 comb += Cat(*issue_l).eq(self.issue_i)
161
162 # connect data register input/output
163
164 # merge (OR) all integer FU / ALU outputs to a single value
165 # XXX NOTE: this only works because there is a single "port"
166 # protected by a single go_wr. multi-issue requires a bus
167 # to be inserted here.
168 if self.units:
169 data_o = treereduce(self.units, "data_o")
170 comb += self.data_o.eq(data_o)
171 if self.ldstmode:
172 addr_o = treereduce(self.units, "addr_o")
173 comb += self.addr_o.eq(addr_o)
174
175 for i, alu in enumerate(self.units):
176 comb += alu.src1_i.eq(self.src1_i)
177 comb += alu.src2_i.eq(self.src2_i)
178
179 if not self.ldstmode:
180 return m
181
182 ldmem_l = []
183 stmem_l = []
184 go_ad_l = []
185 go_st_l = []
186 ld_l = []
187 st_l = []
188 adr_rel_l = []
189 sto_rel_l = []
190 for alu in self.units:
191 ld_l.append(alu.ld_o)
192 st_l.append(alu.st_o)
193 adr_rel_l.append(alu.adr_rel_o)
194 sto_rel_l.append(alu.sto_rel_o)
195 ldmem_l.append(alu.load_mem_o)
196 stmem_l.append(alu.stwd_mem_o)
197 go_ad_l.append(alu.go_ad_i)
198 go_st_l.append(alu.go_st_i)
199 comb += self.ld_o.eq(Cat(*ld_l))
200 comb += self.st_o.eq(Cat(*st_l))
201 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
202 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
203 comb += self.load_mem_o.eq(Cat(*ldmem_l))
204 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
205 comb += Cat(*go_ad_l).eq(self.go_ad_i)
206 comb += Cat(*go_st_l).eq(self.go_st_i)
207
208 return m
209
210
211 class CompUnitLDSTs(CompUnitsBase):
212
213 def __init__(self, rwid, opwid, n_ldsts, mem):
214 """ Inputs:
215
216 * :rwid: bit width of register file(s) - both FP and INT
217 * :opwid: operand bit width
218 """
219 self.opwid = opwid
220
221 # inputs
222 self.oper_i = Signal(opwid, reset_less=True)
223 self.imm_i = Signal(rwid, reset_less=True)
224
225 # Int ALUs
226 self.alus = []
227 for i in range(n_ldsts):
228 self.alus.append(ALU(rwid))
229
230 units = []
231 for alu in self.alus:
232 units.append(LDSTCompUnit(rwid, alu, mem))
233
234 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
235
236 def elaborate(self, platform):
237 m = CompUnitsBase.elaborate(self, platform)
238 comb = m.d.comb
239
240 # hand the same operation to all units, 4 lower bits though
241 for alu in self.units:
242 comb += alu.oper_i[0:4].eq(self.oper_i)
243 #comb += alu.imm_i.eq(self.imm_i)
244 comb += alu.isalu_i.eq(0)
245
246 return m
247
248
249 class CompUnitALUs(CompUnitsBase):
250
251 def __init__(self, rwid, opwid, n_alus):
252 """ Inputs:
253
254 * :rwid: bit width of register file(s) - both FP and INT
255 * :opwid: operand bit width
256 """
257 self.opwid = opwid
258
259 # inputs
260 self.op = CompALUOpSubset("cua_i")
261 self.oper_i = Signal(opwid, reset_less=True)
262 self.imm_i = Signal(rwid, reset_less=True)
263
264 # Int ALUs
265 alus = []
266 for i in range(n_alus):
267 alus.append(ALU(rwid))
268
269 units = []
270 for alu in alus:
271 aluopwid = 3 # extra bit for immediate mode
272 units.append(MultiCompUnit(rwid, alu))
273
274 CompUnitsBase.__init__(self, rwid, units)
275
276 def elaborate(self, platform):
277 m = CompUnitsBase.elaborate(self, platform)
278 comb = m.d.comb
279
280 # hand the subset of operation to ALUs
281 for alu in self.units:
282 comb += alu.oper_i.eq(self.op)
283 #comb += alu.oper_i[0:3].eq(self.oper_i)
284 #comb += alu.imm_i.eq(self.imm_i)
285
286 return m
287
288
289 class CompUnitBR(CompUnitsBase):
290
291 def __init__(self, rwid, opwid):
292 """ Inputs:
293
294 * :rwid: bit width of register file(s) - both FP and INT
295 * :opwid: operand bit width
296
297 Note: bgt unit is returned so that a shadow unit can be created
298 for it
299 """
300 self.opwid = opwid
301
302 # inputs
303 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
304 self.oper_i = Signal(opwid, reset_less=True)
305 self.imm_i = Signal(rwid, reset_less=True)
306
307 # Branch ALU and CU
308 self.bgt = BranchALU(rwid)
309 aluopwid = 3 # extra bit for immediate mode
310 self.br1 = MultiCompUnit(rwid, self.bgt)
311 CompUnitsBase.__init__(self, rwid, [self.br1])
312
313 def elaborate(self, platform):
314 m = CompUnitsBase.elaborate(self, platform)
315 comb = m.d.comb
316
317 # hand the same operation to all units
318 for alu in self.units:
319 #comb += alu.oper_i.eq(self.op) # TODO
320 comb += alu.oper_i.eq(self.oper_i)
321 #comb += alu.imm_i.eq(self.imm_i)
322
323 return m
324
325
326 class FunctionUnits(Elaboratable):
327
328 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
329 self.n_src, self.n_dst = n_src, n_dst
330 self.n_reg = n_reg
331 self.n_int_alus = nf = n_int_alus
332
333 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
334 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
335
336 self.readable_o = Signal(n_int_alus, reset_less=True)
337 self.writable_o = Signal(n_int_alus, reset_less=True)
338
339 # arrays
340 src = []
341 rsel = []
342 rd = []
343 for i in range(n_src):
344 j = i + 1 # name numbering to match src1/src2
345 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
346 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
347 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
348 dst = []
349 dsel = []
350 wr = []
351 for i in range(n_dst):
352 j = i + 1 # name numbering to match src1/src2
353 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
354 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
355 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
356 wpnd = []
357 pend = []
358 for i in range(nf):
359 j = i + 1 # name numbering to match src1/src2
360 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
361 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
362
363 self.dest_i = Array(dst) # Dest in (top)
364 self.src_i = Array(src) # oper in (top)
365
366 # for Register File Select Lines (horizontal), per-reg
367 self.dst_rsel_o = Array(dsel) # dest reg (bot)
368 self.src_rsel_o = Array(rsel) # src reg (bot)
369
370 self.go_rd_i = Array(rd)
371 self.go_wr_i = Array(wr)
372
373 self.go_die_i = Signal(n_int_alus, reset_less=True)
374 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
375
376 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
377
378 def elaborate(self, platform):
379 m = Module()
380 comb = m.d.comb
381 sync = m.d.sync
382
383 n_intfus = self.n_int_alus
384
385 # Integer FU-FU Dep Matrix
386 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
387 m.submodules.intfudeps = intfudeps
388 # Integer FU-Reg Dep Matrix
389 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
390 m.submodules.intregdeps = intregdeps
391
392 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
393 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
394
395 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
396 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
397
398 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
399 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
400 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
401
402 comb += intfudeps.issue_i.eq(self.fn_issue_i)
403 comb += intfudeps.go_die_i.eq(self.go_die_i)
404 comb += self.readable_o.eq(intfudeps.readable_o)
405 comb += self.writable_o.eq(intfudeps.writable_o)
406
407 # Connect function issue / arrays, and dest/src1/src2
408 for i in range(self.n_src):
409 print (i, self.go_rd_i, intfudeps.go_rd_i)
410 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
411 comb += intregdeps.src_i[i].eq(self.src_i[i])
412 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
413 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
414 for i in range(self.n_dst):
415 print (i, self.go_wr_i, intfudeps.go_wr_i)
416 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
417 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
418 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
419 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
420 comb += intregdeps.go_die_i.eq(self.go_die_i)
421 comb += intregdeps.issue_i.eq(self.fn_issue_i)
422
423 return m
424
425
426 class Scoreboard(Elaboratable):
427 def __init__(self, rwid, n_regs):
428 """ Inputs:
429
430 * :rwid: bit width of register file(s) - both FP and INT
431 * :n_regs: depth of register file(s) - number of FP and INT regs
432 """
433 self.rwid = rwid
434 self.n_regs = n_regs
435
436 # Register Files
437 self.intregs = RegFileArray(rwid, n_regs)
438 self.fpregs = RegFileArray(rwid, n_regs)
439
440 # Memory (test for now)
441 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
442
443 # issue q needs to get at these
444 self.aluissue = IssueUnitGroup(2)
445 self.lsissue = IssueUnitGroup(2)
446 self.brissue = IssueUnitGroup(1)
447 # and these
448 self.alu_op = CompALUOpSubset("alu")
449 self.br_oper_i = Signal(4, reset_less=True)
450 self.br_imm_i = Signal(rwid, reset_less=True)
451 self.ls_oper_i = Signal(4, reset_less=True)
452 self.ls_imm_i = Signal(rwid, reset_less=True)
453
454 # inputs
455 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
456 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
457 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
458 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
459
460 # outputs
461 self.issue_o = Signal(reset_less=True) # instruction was accepted
462 self.busy_o = Signal(reset_less=True) # at least one CU is busy
463
464 # for branch speculation experiment. branch_direction = 0 if
465 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
466 # branch_succ and branch_fail are requests to have the current
467 # instruction be dependent on the branch unit "shadow" capability.
468 self.branch_succ_i = Signal(reset_less=True)
469 self.branch_fail_i = Signal(reset_less=True)
470 self.branch_direction_o = Signal(2, reset_less=True)
471
472 def elaborate(self, platform):
473 m = Module()
474 comb = m.d.comb
475 sync = m.d.sync
476
477 m.submodules.intregs = self.intregs
478 m.submodules.fpregs = self.fpregs
479 m.submodules.mem = mem = self.mem
480
481 # register ports
482 int_dest = self.intregs.write_port("dest")
483 int_src1 = self.intregs.read_port("src1")
484 int_src2 = self.intregs.read_port("src2")
485
486 fp_dest = self.fpregs.write_port("dest")
487 fp_src1 = self.fpregs.read_port("src1")
488 fp_src2 = self.fpregs.read_port("src2")
489
490 # Int ALUs and BR ALUs
491 n_int_alus = 5
492 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
493 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
494
495 # LDST Comp Units
496 n_ldsts = 2
497 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
498
499 # Comp Units
500 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
501 bgt = cub.bgt # get at the branch computation unit
502 br1 = cub.br1
503
504 # Int FUs
505 fu_n_src = 2
506 fu_n_dst = 1
507 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
508 fu_n_src, fu_n_dst)
509
510 # Memory FUs
511 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
512
513 # Memory Priority Picker 1: one gateway per memory port
514 # picks 1 reader and 1 writer to intreg
515 mempick1 = GroupPicker(n_ldsts, 1, 1)
516 m.submodules.mempick1 = mempick1
517
518 # Count of number of FUs
519 n_intfus = n_int_alus
520 n_fp_fus = 0 # for now
521
522 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
523 # picks 1 reader and 1 writer to intreg
524 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
525 m.submodules.intpick1 = ipick1
526
527 # INT/FP Issue Unit
528 regdecode = RegDecode(self.n_regs)
529 m.submodules.regdecode = regdecode
530 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
531 m.submodules.issueunit = issueunit
532
533 # Shadow Matrix. currently n_intfus shadows, to be used for
534 # write-after-write hazards. NOTE: there is one extra for branches,
535 # so the shadow width is increased by 1
536 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
537 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
538
539 # record previous instruction to cast shadow on current instruction
540 prev_shadow = Signal(n_intfus)
541
542 # Branch Speculation recorder. tracks the success/fail state as
543 # each instruction is issued, so that when the branch occurs the
544 # allow/cancel can be issued as appropriate.
545 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
546
547 # ---------
548 # ok start wiring things together...
549 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
550 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
551 # ---------
552
553 # ---------
554 # Issue Unit is where it starts. set up some in/outs for this module
555 # ---------
556 comb += [regdecode.dest_i.eq(self.int_dest_i),
557 regdecode.src1_i.eq(self.int_src1_i),
558 regdecode.src2_i.eq(self.int_src2_i),
559 regdecode.enable_i.eq(self.reg_enable_i),
560 self.issue_o.eq(issueunit.issue_o)
561 ]
562
563 # take these to outside (issue needs them)
564 comb += cua.op.eq(self.alu_op)
565 comb += cub.oper_i.eq(self.br_oper_i)
566 comb += cub.imm_i.eq(self.br_imm_i)
567 comb += cul.oper_i.eq(self.ls_oper_i)
568 comb += cul.imm_i.eq(self.ls_imm_i)
569
570 # TODO: issueunit.f (FP)
571
572 # and int function issue / busy arrays, and dest/src1/src2
573 comb += intfus.dest_i[0].eq(regdecode.dest_o)
574 comb += intfus.src_i[0].eq(regdecode.src1_o)
575 comb += intfus.src_i[1].eq(regdecode.src2_o)
576
577 fn_issue_o = issueunit.fn_issue_o
578
579 comb += intfus.fn_issue_i.eq(fn_issue_o)
580 comb += issueunit.busy_i.eq(cu.busy_o)
581 comb += self.busy_o.eq(cu.busy_o.bool())
582
583 # ---------
584 # Memory Function Unit
585 # ---------
586 reset_b = Signal(cul.n_units, reset_less=True)
587 # XXX was cul.go_wr_i not done.o
588 # sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
589 sync += reset_b.eq(cul.go_st_i | cul.done_o | cul.go_die_i)
590
591 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
592 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
593 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
594
595 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
596 # in a transitive fashion). This cycle activates based on LDSTCompUnit
597 # issue_i. multi-issue gets a bit more complex but not a lot.
598 prior_ldsts = Signal(cul.n_units, reset_less=True)
599 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
600 with m.If(self.ls_oper_i[3]): # LD bit of operand
601 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
602 with m.If(self.ls_oper_i[2]): # ST bit of operand
603 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
604
605 # TODO: adr_rel_o needs to go into L1 Cache. for now,
606 # just immediately activate go_adr
607 comb += cul.go_ad_i.eq(cul.adr_rel_o)
608
609 # connect up address data
610 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
611 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
612
613 # connect loadable / storable to go_ld/go_st.
614 # XXX should only be done when the memory ld/st has actually happened!
615 go_st_i = Signal(cul.n_units, reset_less=True)
616 go_ld_i = Signal(cul.n_units, reset_less=True)
617 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
618 cul.adr_rel_o & cul.ld_o)
619 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
620 cul.sto_rel_o & cul.st_o)
621 comb += memfus.go_ld_i.eq(go_ld_i)
622 comb += memfus.go_st_i.eq(go_st_i)
623 #comb += cul.go_wr_i.eq(go_ld_i)
624 comb += cul.go_st_i.eq(go_st_i)
625
626 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
627 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
628 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
629
630 # ---------
631 # merge shadow matrices outputs
632 # ---------
633
634 # these are explained in ShadowMatrix docstring, and are to be
635 # connected to the FUReg and FUFU Matrices, to get them to reset
636 anydie = Signal(n_intfus, reset_less=True)
637 allshadown = Signal(n_intfus, reset_less=True)
638 shreset = Signal(n_intfus, reset_less=True)
639 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
640 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
641 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
642
643 # ---------
644 # connect fu-fu matrix
645 # ---------
646
647 # Group Picker... done manually for now.
648 go_rd_o = ipick1.go_rd_o
649 go_wr_o = ipick1.go_wr_o
650 go_rd_i = intfus.go_rd_i
651 go_wr_i = intfus.go_wr_i
652 go_die_i = intfus.go_die_i
653 # NOTE: connect to the shadowed versions so that they can "die" (reset)
654 for i in range(fu_n_src):
655 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
656 for i in range(fu_n_dst):
657 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
658 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
659
660 # Connect Picker
661 # ---------
662 int_rd_o = intfus.readable_o
663 rrel_o = cu.rd_rel_o
664 rqrl_o = cu.req_rel_o
665 for i in range(fu_n_src):
666 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
667 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
668 int_wr_o = intfus.writable_o
669 for i in range(fu_n_dst):
670 # XXX FIXME: rqrl_o[i] here
671 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
672 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
673
674 # ---------
675 # Shadow Matrix
676 # ---------
677
678 comb += shadows.issue_i.eq(fn_issue_o)
679 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
680 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
681 # ---------
682 # NOTE; this setup is for the instruction order preservation...
683
684 # connect shadows / go_dies to Computation Units
685 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
686 comb += cu.go_die_i[0:n_intfus].eq(anydie)
687
688 # ok connect first n_int_fu shadows to busy lines, to create an
689 # instruction-order linked-list-like arrangement, using a bit-matrix
690 # (instead of e.g. a ring buffer).
691
692 # when written, the shadow can be cancelled (and was good)
693 for i in range(n_intfus):
694 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
695 # XXX experiment: use ~cu.busy_o instead. *should* be good
696 # because the comp unit is only free once completed
697 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
698
699 # *previous* instruction shadows *current* instruction, and, obviously,
700 # if the previous is completed (!busy) don't cast the shadow!
701 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
702 for i in range(n_intfus):
703 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
704
705 # ---------
706 # ... and this is for branch speculation. it uses the extra bit
707 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
708 # only needs to set shadow_i, s_fail_i and s_good_i
709
710 # issue captures shadow_i (if enabled)
711 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
712
713 bactive = Signal(reset_less=True)
714 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
715
716 # instruction being issued (fn_issue_o) has a shadow cast by the branch
717 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
718 comb += bshadow.issue_i.eq(fn_issue_o)
719 for i in range(n_intfus):
720 with m.If(fn_issue_o & (Const(1 << i))):
721 comb += bshadow.shadow_i[i][0].eq(1)
722
723 # finally, we need an indicator to the test infrastructure as to
724 # whether the branch succeeded or failed, plus, link up to the
725 # "recorder" of whether the instruction was under shadow or not
726
727 with m.If(br1.issue_i):
728 sync += bspec.active_i.eq(1)
729 with m.If(self.branch_succ_i):
730 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
731 with m.If(self.branch_fail_i):
732 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
733
734 # branch is active (TODO: a better signal: this is over-using the
735 # go_write signal - actually the branch should not be "writing")
736 with m.If(br1.go_wr_i):
737 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
738 sync += bspec.active_i.eq(0)
739 comb += bspec.br_i.eq(1)
740 # branch occurs if data == 1, failed if data == 0
741 comb += bspec.br_ok_i.eq(br1.data_o == 1)
742 for i in range(n_intfus):
743 # *expected* direction of the branch matched against *actual*
744 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
745 # ... or it didn't
746 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
747
748 # ---------
749 # Connect Register File(s)
750 # ---------
751 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
752 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
753 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
754
755 # connect ALUs to regfile
756 comb += int_dest.data_i.eq(cu.data_o)
757 comb += cu.src1_i.eq(int_src1.data_o)
758 comb += cu.src2_i.eq(int_src2.data_o)
759
760 # connect ALU Computation Units
761 for i in range(fu_n_src):
762 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
763 for i in range(fu_n_dst):
764 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
765 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
766
767 return m
768
769 def __iter__(self):
770 yield from self.intregs
771 yield from self.fpregs
772 yield self.int_dest_i
773 yield self.int_src1_i
774 yield self.int_src2_i
775 yield self.issue_o
776 yield self.branch_succ_i
777 yield self.branch_fail_i
778 yield self.branch_direction_o
779
780 def ports(self):
781 return list(self)
782
783
784 class IssueToScoreboard(Elaboratable):
785
786 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
787 self.qlen = qlen
788 self.n_in = n_in
789 self.n_out = n_out
790 self.rwid = rwid
791 self.opw = opwid
792 self.n_regs = n_regs
793
794 mqbits = unsigned(int(log(qlen) / log(2))+2)
795 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
796 self.p_ready_o = Signal() # instructions were added
797 self.data_i = Instruction._nq(n_in, "data_i")
798
799 self.busy_o = Signal(reset_less=True) # at least one CU is busy
800 self.qlen_o = Signal(mqbits, reset_less=True)
801
802 def elaborate(self, platform):
803 m = Module()
804 comb = m.d.comb
805 sync = m.d.sync
806
807 iq = InstructionQ(self.rwid, self.opw, self.qlen,
808 self.n_in, self.n_out)
809 sc = Scoreboard(self.rwid, self.n_regs)
810 m.submodules.iq = iq
811 m.submodules.sc = sc
812
813 # get at the regfile for testing
814 self.intregs = sc.intregs
815
816 # and the "busy" signal and instruction queue length
817 comb += self.busy_o.eq(sc.busy_o)
818 comb += self.qlen_o.eq(iq.qlen_o)
819
820 # link up instruction queue
821 comb += iq.p_add_i.eq(self.p_add_i)
822 comb += self.p_ready_o.eq(iq.p_ready_o)
823 for i in range(self.n_in):
824 comb += eq(iq.data_i[i], self.data_i[i])
825
826 # take instruction and process it. note that it's possible to
827 # "inspect" the queue contents *without* actually removing the
828 # items. items are only removed when the
829
830 # in "waiting" state
831 wait_issue_br = Signal()
832 wait_issue_alu = Signal()
833 wait_issue_ls = Signal()
834
835 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
836 # set instruction pop length to 1 if the unit accepted
837 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
838 with m.If(iq.qlen_o != 0):
839 comb += iq.n_sub_i.eq(1)
840 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
841 with m.If(iq.qlen_o != 0):
842 comb += iq.n_sub_i.eq(1)
843 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
844 with m.If(iq.qlen_o != 0):
845 comb += iq.n_sub_i.eq(1)
846
847 # see if some instruction(s) are here. note that this is
848 # "inspecting" the in-place queue. note also that on the
849 # cycle following "waiting" for fn_issue_o to be set, the
850 # "resetting" done above (insn_i=0) could be re-ASSERTed.
851 with m.If(iq.qlen_o != 0):
852 # get the operands and operation
853 instr = iq.data_o[0]
854 imm = instr.imm_data.data
855 dest = instr.write_reg.data
856 src1 = instr.read_reg1.data
857 src2 = instr.read_reg2.data
858 op = instr.insn_type
859 fu = instr.fn_unit
860 opi = instr.imm_data.ok # immediate set
861
862 # set the src/dest regs
863 comb += sc.int_dest_i.eq(dest)
864 comb += sc.int_src1_i.eq(src1)
865 comb += sc.int_src2_i.eq(src2)
866 comb += sc.reg_enable_i.eq(1) # enable the regfile
867
868 # choose a Function-Unit-Group
869 with m.If(fu == Function.ALU): # alu
870 comb += sc.alu_op.eq_from_execute1(instr)
871 comb += sc.aluissue.insn_i.eq(1)
872 comb += wait_issue_alu.eq(1)
873 with m.Elif((op & (0x3 << 2)) != 0): # branch
874 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
875 comb += sc.br_imm_i.eq(imm)
876 comb += sc.brissue.insn_i.eq(1)
877 comb += wait_issue_br.eq(1)
878 with m.Elif((op & (0x3 << 4)) != 0): # ld/st
879 # see compldst.py
880 # bit 0: ADD/SUB
881 # bit 1: immed
882 # bit 4: LD
883 # bit 5: ST
884 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
885 comb += sc.ls_imm_i.eq(imm)
886 comb += sc.lsissue.insn_i.eq(1)
887 comb += wait_issue_ls.eq(1)
888
889 # XXX TODO
890 # these indicate that the instruction is to be made
891 # shadow-dependent on
892 # (either) branch success or branch fail
893 # yield sc.branch_fail_i.eq(branch_fail)
894 # yield sc.branch_succ_i.eq(branch_success)
895
896 return m
897
898 def __iter__(self):
899 yield self.p_ready_o
900 for o in self.data_i:
901 yield from list(o)
902 yield self.p_add_i
903
904 def ports(self):
905 return list(self)
906
907
908 def power_instr_q(dut, pdecode2, ins, code):
909 instrs = [pdecode2.e]
910
911 sendlen = 1
912 for idx, instr in enumerate(instrs):
913 yield dut.data_i[idx].eq(instr)
914 insn_type = yield instr.insn_type
915 fn_unit = yield instr.fn_unit
916 print("senddata ", idx, insn_type, fn_unit, instr)
917 yield dut.p_add_i.eq(sendlen)
918 yield
919 o_p_ready = yield dut.p_ready_o
920 while not o_p_ready:
921 yield
922 o_p_ready = yield dut.p_ready_o
923
924 yield dut.p_add_i.eq(0)
925
926
927 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
928 branch_success, branch_fail):
929 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
930 'imm_data': (imm, op_imm),
931 'read_reg1': src1, 'read_reg2': src2}]
932
933 sendlen = 1
934 for idx, instr in enumerate(instrs):
935 imm, op_imm = instr['imm_data']
936 reg1 = instr['read_reg1']
937 reg2 = instr['read_reg2']
938 dest = instr['write_reg']
939 insn_type = instr['insn_type']
940 fn_unit = instr['fn_unit']
941 yield dut.data_i[idx].insn_type.eq(insn_type)
942 yield dut.data_i[idx].fn_unit.eq(fn_unit)
943 yield dut.data_i[idx].read_reg1.data.eq(reg1)
944 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
945 yield dut.data_i[idx].read_reg2.data.eq(reg2)
946 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
947 yield dut.data_i[idx].write_reg.data.eq(dest)
948 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
949 yield dut.data_i[idx].imm_data.data.eq(imm)
950 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
951 di = yield dut.data_i[idx]
952 print("senddata %d %x" % (idx, di))
953 yield dut.p_add_i.eq(sendlen)
954 yield
955 o_p_ready = yield dut.p_ready_o
956 while not o_p_ready:
957 yield
958 o_p_ready = yield dut.p_ready_o
959
960 yield dut.p_add_i.eq(0)
961
962
963 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
964 yield from disable_issue(dut)
965 yield dut.int_dest_i.eq(dest)
966 yield dut.int_src1_i.eq(src1)
967 yield dut.int_src2_i.eq(src2)
968 if (op & (0x3 << 2)) != 0: # branch
969 yield dut.brissue.insn_i.eq(1)
970 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
971 yield dut.br_imm_i.eq(imm)
972 dut_issue = dut.brissue
973 else:
974 yield dut.aluissue.insn_i.eq(1)
975 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
976 yield dut.alu_imm_i.eq(imm)
977 dut_issue = dut.aluissue
978 yield dut.reg_enable_i.eq(1)
979
980 # these indicate that the instruction is to be made shadow-dependent on
981 # (either) branch success or branch fail
982 yield dut.branch_fail_i.eq(branch_fail)
983 yield dut.branch_succ_i.eq(branch_success)
984
985 yield
986 yield from wait_for_issue(dut, dut_issue)
987
988
989 def print_reg(dut, rnums):
990 rs = []
991 for rnum in rnums:
992 reg = yield dut.intregs.regs[rnum].reg
993 rs.append("%x" % reg)
994 rnums = map(str, rnums)
995 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
996
997
998 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
999 insts = []
1000 for i in range(n_ops):
1001 src1 = randint(1, dut.n_regs-1)
1002 src2 = randint(1, dut.n_regs-1)
1003 imm = randint(1, (1 << dut.rwid)-1)
1004 dest = randint(1, dut.n_regs-1)
1005 op = randint(0, max_opnums)
1006 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
1007
1008 if shadowing:
1009 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1010 else:
1011 insts.append((src1, src2, dest, op, opi, imm))
1012 return insts
1013
1014
1015 def wait_for_busy_clear(dut):
1016 while True:
1017 busy_o = yield dut.busy_o
1018 if not busy_o:
1019 break
1020 print("busy",)
1021 yield
1022
1023
1024 def disable_issue(dut):
1025 yield dut.aluissue.insn_i.eq(0)
1026 yield dut.brissue.insn_i.eq(0)
1027 yield dut.lsissue.insn_i.eq(0)
1028
1029
1030 def wait_for_issue(dut, dut_issue):
1031 while True:
1032 issue_o = yield dut_issue.fn_issue_o
1033 if issue_o:
1034 yield from disable_issue(dut)
1035 yield dut.reg_enable_i.eq(0)
1036 break
1037 print("busy",)
1038 # yield from print_reg(dut, [1,2,3])
1039 yield
1040 # yield from print_reg(dut, [1,2,3])
1041
1042
1043 def scoreboard_branch_sim(dut, alusim):
1044
1045 iseed = 3
1046
1047 for i in range(1):
1048
1049 print("rseed", iseed)
1050 seed(iseed)
1051 iseed += 1
1052
1053 yield dut.branch_direction_o.eq(0)
1054
1055 # set random values in the registers
1056 for i in range(1, dut.n_regs):
1057 val = 31+i*3
1058 val = randint(0, (1 << alusim.rwidth)-1)
1059 yield dut.intregs.regs[i].reg.eq(val)
1060 alusim.setval(i, val)
1061
1062 if False:
1063 # create some instructions: branches create a tree
1064 insts = create_random_ops(dut, 1, True, 1)
1065 #insts.append((6, 6, 1, 2, (0, 0)))
1066 #insts.append((4, 3, 3, 0, (0, 0)))
1067
1068 src1 = randint(1, dut.n_regs-1)
1069 src2 = randint(1, dut.n_regs-1)
1070 #op = randint(4, 7)
1071 op = 4 # only BGT at the moment
1072
1073 branch_ok = create_random_ops(dut, 1, True, 1)
1074 branch_fail = create_random_ops(dut, 1, True, 1)
1075
1076 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1077
1078 if True:
1079 insts = []
1080 insts.append((3, 5, 2, 0, (0, 0)))
1081 branch_ok = []
1082 branch_fail = []
1083 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1084 branch_ok.append(None)
1085 branch_fail.append((1, 1, 2, 0, (0, 1)))
1086 #branch_fail.append( None )
1087 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1088
1089 siminsts = deepcopy(insts)
1090
1091 # issue instruction(s)
1092 i = -1
1093 instrs = insts
1094 branch_direction = 0
1095 while instrs:
1096 yield
1097 yield
1098 i += 1
1099 branch_direction = yield dut.branch_direction_o # way branch went
1100 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1101 if branch_direction == 1 and shadow_on:
1102 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1103 continue # branch was "success" and this is a "failed"... skip
1104 if branch_direction == 2 and shadow_off:
1105 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1106 continue # branch was "fail" and this is a "success"... skip
1107 if branch_direction != 0:
1108 shadow_on = 0
1109 shadow_off = 0
1110 is_branch = op >= 4
1111 if is_branch:
1112 branch_ok, branch_fail = dest
1113 dest = src2
1114 # ok zip up the branch success / fail instructions and
1115 # drop them into the queue, one marked "to have branch success"
1116 # the other to be marked shadow branch "fail".
1117 # one out of each of these will be cancelled
1118 for ok, fl in zip(branch_ok, branch_fail):
1119 if ok:
1120 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1121 if fl:
1122 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1123 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1124 (i, src1, src2, dest, op, shadow_on, shadow_off))
1125 yield from int_instr(dut, op, src1, src2, dest,
1126 shadow_on, shadow_off)
1127
1128 # wait for all instructions to stop before checking
1129 yield
1130 yield from wait_for_busy_clear(dut)
1131
1132 i = -1
1133 while siminsts:
1134 instr = siminsts.pop(0)
1135 if instr is None:
1136 continue
1137 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1138 i += 1
1139 is_branch = op >= 4
1140 if is_branch:
1141 branch_ok, branch_fail = dest
1142 dest = src2
1143 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1144 (i, src1, src2, dest, op, shadow_on, shadow_off))
1145 branch_res = alusim.op(op, src1, src2, dest)
1146 if is_branch:
1147 if branch_res:
1148 siminsts += branch_ok
1149 else:
1150 siminsts += branch_fail
1151
1152 # check status
1153 yield from alusim.check(dut)
1154 yield from alusim.dump(dut)
1155
1156
1157 def power_sim(m, dut, pdecode2, instruction, alusim):
1158
1159 seed(0)
1160
1161 for i in range(1):
1162
1163 # set random values in the registers
1164 for i in range(1, dut.n_regs):
1165 #val = randint(0, (1<<alusim.rwidth)-1)
1166 #val = 31+i*3
1167 val = i # XXX actually, not random at all
1168 yield dut.intregs.regs[i].reg.eq(val)
1169 alusim.setval(i, val)
1170
1171 # create some instructions
1172 lst = ["addi 2, 0, 0x4321",
1173 "addi 3, 0, 0x1234",
1174 "add 1, 3, 2",
1175 "add 4, 3, 5"
1176 ]
1177 with Program(lst) as program:
1178 gen = program.generate_instructions()
1179
1180 # issue instruction(s), wait for issue to be free before proceeding
1181 for ins, code in zip(gen, program.assembly.splitlines()):
1182 yield instruction.eq(ins) # raw binary instr.
1183 yield #Delay(1e-6)
1184
1185 print("binary 0x{:X}".format(ins & 0xffffffff))
1186 print("assembly", code)
1187
1188 #alusim.op(op, opi, imm, src1, src2, dest)
1189 yield from power_instr_q(dut, pdecode2, ins, code)
1190
1191 # wait for all instructions to stop before checking
1192 while True:
1193 iqlen = yield dut.qlen_o
1194 if iqlen == 0:
1195 break
1196 yield
1197 yield
1198 yield
1199 yield
1200 yield
1201 yield from wait_for_busy_clear(dut)
1202
1203 # check status
1204 yield from alusim.check(dut)
1205 yield from alusim.dump(dut)
1206
1207
1208 def scoreboard_sim(dut, alusim):
1209
1210 seed(0)
1211
1212 for i in range(1):
1213
1214 # set random values in the registers
1215 for i in range(1, dut.n_regs):
1216 #val = randint(0, (1<<alusim.rwidth)-1)
1217 #val = 31+i*3
1218 val = i
1219 yield dut.intregs.regs[i].reg.eq(val)
1220 alusim.setval(i, val)
1221
1222 # create some instructions (some random, some regression tests)
1223 instrs = []
1224 if False:
1225 instrs = create_random_ops(dut, 15, True, 4)
1226
1227 if False: # LD/ST test (with immediate)
1228 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1229 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1230
1231 if False:
1232 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1233
1234 if False:
1235 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1236 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1237 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1238
1239 if True:
1240 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1241 0, 0, (0, 0)))
1242 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1243 0, 0, (0, 0)))
1244 if False:
1245 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1246 1, 7, (0, 0)))
1247 if False:
1248 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1249 0, 0, (0, 0)))
1250
1251 if False:
1252 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1253 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1254 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1255 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1256 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1257
1258 if False:
1259 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1260 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1261 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1262
1263 if False:
1264 instrs.append((5, 6, 2, 1))
1265 instrs.append((2, 2, 4, 0))
1266 #instrs.append((2, 2, 3, 1))
1267
1268 if False:
1269 instrs.append((2, 1, 2, 3))
1270
1271 if False:
1272 instrs.append((2, 6, 2, 1))
1273 instrs.append((2, 1, 2, 0))
1274
1275 if False:
1276 instrs.append((1, 2, 7, 2))
1277 instrs.append((7, 1, 5, 0))
1278 instrs.append((4, 4, 1, 1))
1279
1280 if False:
1281 instrs.append((5, 6, 2, 2))
1282 instrs.append((1, 1, 4, 1))
1283 instrs.append((6, 5, 3, 0))
1284
1285 if False:
1286 # Write-after-Write Hazard
1287 instrs.append((3, 6, 7, 2))
1288 instrs.append((4, 4, 7, 1))
1289
1290 if False:
1291 # self-read/write-after-write followed by Read-after-Write
1292 instrs.append((1, 1, 1, 1))
1293 instrs.append((1, 5, 3, 0))
1294
1295 if False:
1296 # Read-after-Write followed by self-read-after-write
1297 instrs.append((5, 6, 1, 2))
1298 instrs.append((1, 1, 1, 1))
1299
1300 if False:
1301 # self-read-write sandwich
1302 instrs.append((5, 6, 1, 2))
1303 instrs.append((1, 1, 1, 1))
1304 instrs.append((1, 5, 3, 0))
1305
1306 if False:
1307 # very weird failure
1308 instrs.append((5, 2, 5, 2))
1309 instrs.append((2, 6, 3, 0))
1310 instrs.append((4, 2, 2, 1))
1311
1312 if False:
1313 v1 = 4
1314 yield dut.intregs.regs[5].reg.eq(v1)
1315 alusim.setval(5, v1)
1316 yield dut.intregs.regs[3].reg.eq(5)
1317 alusim.setval(3, 5)
1318 instrs.append((5, 3, 3, 4, (0, 0)))
1319 instrs.append((4, 2, 1, 2, (0, 1)))
1320
1321 if False:
1322 v1 = 6
1323 yield dut.intregs.regs[5].reg.eq(v1)
1324 alusim.setval(5, v1)
1325 yield dut.intregs.regs[3].reg.eq(5)
1326 alusim.setval(3, 5)
1327 instrs.append((5, 3, 3, 4, (0, 0)))
1328 instrs.append((4, 2, 1, 2, (1, 0)))
1329
1330 if False:
1331 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1332 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1333 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1334 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1335 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1336 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1337 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1338 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1339 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1340
1341 # issue instruction(s), wait for issue to be free before proceeding
1342 for i, instr in enumerate(instrs):
1343 print (i, instr)
1344 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1345
1346 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1347 (i, src1, src2, dest, op, fn_unit, opi, imm))
1348 alusim.op(op, opi, imm, src1, src2, dest)
1349 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1350 br_ok, br_fail)
1351
1352 # wait for all instructions to stop before checking
1353 while True:
1354 iqlen = yield dut.qlen_o
1355 if iqlen == 0:
1356 break
1357 yield
1358 yield
1359 yield
1360 yield
1361 yield
1362 yield from wait_for_busy_clear(dut)
1363
1364 # check status
1365 yield from alusim.check(dut)
1366 yield from alusim.dump(dut)
1367
1368
1369 def test_scoreboard():
1370 regwidth = 64
1371 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1372 alusim = RegSim(regwidth, 8)
1373 memsim = MemSim(16, 8)
1374
1375 m = Module()
1376 comb = m.d.comb
1377 instruction = Signal(32)
1378
1379 # set up the decoder (and simulator, later)
1380 pdecode = create_pdecode()
1381 #simulator = ISA(pdecode, initial_regs)
1382
1383 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1384 m.submodules.sim = dut
1385
1386 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1387 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1388
1389 vl = rtlil.convert(m, ports=dut.ports())
1390 with open("test_scoreboard6600.il", "w") as f:
1391 f.write(vl)
1392
1393 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1394 vcd_name='test_powerboard6600.vcd')
1395
1396 #run_simulation(dut, scoreboard_sim(dut, alusim),
1397 # vcd_name='test_scoreboard6600.vcd')
1398
1399 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1400 # vcd_name='test_scoreboard6600.vcd')
1401
1402
1403 if __name__ == '__main__':
1404 test_scoreboard()