da30ca67f56c5eb7703345d6520c906aaae11306
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit
19 from soc.experiment.compldst import LDSTCompUnit
20 from soc.experiment.testmem import TestMemory
21
22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
23
24 from soc.decoder.power_enums import InternalOp, Function
25 from soc.decoder.power_decoder import (create_pdecode)
26 from soc.decoder.power_decoder2 import (PowerDecode2)
27 from soc.simulator.program import Program
28
29
30 from nmutil.latch import SRLatch
31 from nmutil.nmoperator import eq
32
33 from random import randint, seed
34 from copy import deepcopy
35 from math import log
36
37 from soc.experiment.sim import RegSim, MemSim
38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
39
40
41 class CompUnitsBase(Elaboratable):
42 """ Computation Unit Base class.
43
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
48
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
52 on Computation Units!
53
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
57 ALUs together.
58
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
64 11.4.9.3)
65 """
66
67 def __init__(self, rwid, units, ldstmode=False):
68 """ Inputs:
69
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
72 """
73 self.units = units
74 self.ldstmode = ldstmode
75 self.rwid = rwid
76 self.rwid = rwid
77 if units and isinstance(units[0], CompUnitsBase):
78 self.n_units = 0
79 for u in self.units:
80 self.n_units += u.n_units
81 else:
82 self.n_units = len(units)
83
84 n_units = self.n_units
85
86 # inputs
87 self.issue_i = Signal(n_units, reset_less=True)
88 self.go_rd0_i = Signal(n_units, reset_less=True)
89 self.go_rd1_i = Signal(n_units, reset_less=True)
90 self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK!
91 self.go_wr_i = Signal(n_units, reset_less=True)
92 self.shadown_i = Signal(n_units, reset_less=True)
93 self.go_die_i = Signal(n_units, reset_less=True)
94 if ldstmode:
95 self.go_ad_i = Signal(n_units, reset_less=True)
96 self.go_st_i = Signal(n_units, reset_less=True)
97
98 # outputs
99 self.busy_o = Signal(n_units, reset_less=True)
100 self.rd_rel0_o = Signal(n_units, reset_less=True)
101 self.rd_rel1_o = Signal(n_units, reset_less=True)
102 self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK!
103 self.req_rel_o = Signal(n_units, reset_less=True)
104 self.done_o = Signal(n_units, reset_less=True)
105 if ldstmode:
106 self.ld_o = Signal(n_units, reset_less=True) # op is LD
107 self.st_o = Signal(n_units, reset_less=True) # op is ST
108 self.adr_rel_o = Signal(n_units, reset_less=True)
109 self.sto_rel_o = Signal(n_units, reset_less=True)
110 self.load_mem_o = Signal(n_units, reset_less=True)
111 self.stwd_mem_o = Signal(n_units, reset_less=True)
112 self.addr_o = Signal(rwid, reset_less=True)
113
114 # in/out register data (note: not register#, actual data)
115 self.data_o = Signal(rwid, reset_less=True)
116 self.src1_i = Signal(rwid, reset_less=True)
117 self.src2_i = Signal(rwid, reset_less=True)
118 # input operand
119
120 def elaborate(self, platform):
121 m = Module()
122 comb = m.d.comb
123
124 for i, alu in enumerate(self.units):
125 setattr(m.submodules, "comp%d" % i, alu)
126
127 go_rd_l0 = []
128 go_rd_l1 = []
129 go_wr_l = []
130 issue_l = []
131 busy_l = []
132 req_rel_l = []
133 done_l = []
134 rd_rel0_l = []
135 rd_rel1_l = []
136 shadow_l = []
137 godie_l = []
138 for alu in self.units:
139 req_rel_l.append(alu.req_rel_o)
140 done_l.append(alu.done_o)
141 shadow_l.append(alu.shadown_i)
142 godie_l.append(alu.go_die_i)
143 print (alu, alu.rd_rel_o)
144 if isinstance(alu, LDSTCompUnit) or \
145 isinstance(alu, CompUnitBR) or \
146 isinstance(alu, ComputationUnitNoDelay):
147 dummy1 = Signal(64, reset_less=True)
148 dummy2 = Signal(64, reset_less=True)
149 dummy3 = Signal(64, reset_less=True)
150 go_wr_l.append(dummy1)
151 go_rd_l0.append(dummy2)
152 go_rd_l1.append(dummy3)
153 else:
154 rd_rel0_l.append(alu.rd_rel_o[0])
155 rd_rel1_l.append(alu.rd_rel_o[1])
156 go_wr_l.append(alu.go_wr_i[0])
157 go_rd_l0.append(alu.go_rd_i[0])
158 go_rd_l1.append(alu.go_rd_i[1])
159 rd_rel0_l.append(Const(0, 64)) # FIXME
160 rd_rel1_l.append(Const(0, 64)) # FIXME
161 issue_l.append(alu.issue_i)
162 busy_l.append(alu.busy_o)
163 comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l))
164 comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l))
165 comb += self.req_rel_o.eq(Cat(*req_rel_l))
166 comb += self.done_o.eq(Cat(*done_l))
167 comb += self.busy_o.eq(Cat(*busy_l))
168 comb += Cat(*godie_l).eq(self.go_die_i)
169 comb += Cat(*shadow_l).eq(self.shadown_i)
170 comb += Cat(*go_wr_l).eq(self.go_wr_i)
171 comb += Cat(*go_rd_l0).eq(self.go_rd0_i)
172 comb += Cat(*go_rd_l1).eq(self.go_rd1_i)
173 comb += Cat(*issue_l).eq(self.issue_i)
174
175 # connect data register input/output
176
177 # merge (OR) all integer FU / ALU outputs to a single value
178 # XXX NOTE: this only works because there is a single "port"
179 # protected by a single go_wr. multi-issue requires a bus
180 # to be inserted here.
181 if self.units:
182 data_o = treereduce(self.units, "data_o")
183 comb += self.data_o.eq(data_o)
184 if self.ldstmode:
185 addr_o = treereduce(self.units, "addr_o")
186 comb += self.addr_o.eq(addr_o)
187
188 for i, alu in enumerate(self.units):
189 comb += alu.src1_i.eq(self.src1_i)
190 comb += alu.src2_i.eq(self.src2_i)
191
192 if not self.ldstmode:
193 return m
194
195 ldmem_l = []
196 stmem_l = []
197 go_ad_l = []
198 go_st_l = []
199 ld_l = []
200 st_l = []
201 adr_rel_l = []
202 sto_rel_l = []
203 for alu in self.units:
204 ld_l.append(alu.ld_o)
205 st_l.append(alu.st_o)
206 adr_rel_l.append(alu.adr_rel_o)
207 sto_rel_l.append(alu.sto_rel_o)
208 ldmem_l.append(alu.load_mem_o)
209 stmem_l.append(alu.stwd_mem_o)
210 go_ad_l.append(alu.go_ad_i)
211 go_st_l.append(alu.go_st_i)
212 comb += self.ld_o.eq(Cat(*ld_l))
213 comb += self.st_o.eq(Cat(*st_l))
214 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
215 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
216 comb += self.load_mem_o.eq(Cat(*ldmem_l))
217 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
218 comb += Cat(*go_ad_l).eq(self.go_ad_i)
219 comb += Cat(*go_st_l).eq(self.go_st_i)
220
221 return m
222
223
224 class CompUnitLDSTs(CompUnitsBase):
225
226 def __init__(self, rwid, opwid, n_ldsts, mem):
227 """ Inputs:
228
229 * :rwid: bit width of register file(s) - both FP and INT
230 * :opwid: operand bit width
231 """
232 self.opwid = opwid
233
234 # inputs
235 self.oper_i = Signal(opwid, reset_less=True)
236 self.imm_i = Signal(rwid, reset_less=True)
237
238 # Int ALUs
239 self.alus = []
240 for i in range(n_ldsts):
241 self.alus.append(ALU(rwid))
242
243 units = []
244 for alu in self.alus:
245 aluopwid = 4 # see compldst.py for "internal" opcode
246 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
247
248 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
249
250 def elaborate(self, platform):
251 m = CompUnitsBase.elaborate(self, platform)
252 comb = m.d.comb
253
254 # hand the same operation to all units, 4 lower bits though
255 for alu in self.units:
256 comb += alu.oper_i[0:4].eq(self.oper_i)
257 comb += alu.imm_i.eq(self.imm_i)
258 comb += alu.isalu_i.eq(0)
259
260 return m
261
262
263 class CompUnitALUs(CompUnitsBase):
264
265 def __init__(self, rwid, opwid, n_alus):
266 """ Inputs:
267
268 * :rwid: bit width of register file(s) - both FP and INT
269 * :opwid: operand bit width
270 """
271 self.opwid = opwid
272
273 # inputs
274 self.op = CompALUOpSubset("cua_i")
275 self.oper_i = Signal(opwid, reset_less=True)
276 self.imm_i = Signal(rwid, reset_less=True)
277
278 # Int ALUs
279 alus = []
280 for i in range(n_alus):
281 alus.append(ALU(rwid))
282
283 units = []
284 for alu in alus:
285 aluopwid = 3 # extra bit for immediate mode
286 units.append(MultiCompUnit(rwid, alu))
287
288 CompUnitsBase.__init__(self, rwid, units)
289
290 def elaborate(self, platform):
291 m = CompUnitsBase.elaborate(self, platform)
292 comb = m.d.comb
293
294 # hand the subset of operation to ALUs
295 for alu in self.units:
296 comb += alu.oper_i.eq(self.op)
297 #comb += alu.oper_i[0:3].eq(self.oper_i)
298 #comb += alu.imm_i.eq(self.imm_i)
299
300 return m
301
302
303 class CompUnitBR(CompUnitsBase):
304
305 def __init__(self, rwid, opwid):
306 """ Inputs:
307
308 * :rwid: bit width of register file(s) - both FP and INT
309 * :opwid: operand bit width
310
311 Note: bgt unit is returned so that a shadow unit can be created
312 for it
313 """
314 self.opwid = opwid
315
316 # inputs
317 self.oper_i = Signal(opwid, reset_less=True)
318 self.imm_i = Signal(rwid, reset_less=True)
319
320 # Branch ALU and CU
321 self.bgt = BranchALU(rwid)
322 aluopwid = 3 # extra bit for immediate mode
323 self.br1 = ComputationUnitNoDelay(rwid, self.bgt)
324 CompUnitsBase.__init__(self, rwid, [self.br1])
325
326 def elaborate(self, platform):
327 m = CompUnitsBase.elaborate(self, platform)
328 comb = m.d.comb
329
330 # hand the same operation to all units
331 for alu in self.units:
332 comb += alu.oper_i.eq(self.oper_i)
333 #comb += alu.imm_i.eq(self.imm_i)
334
335 return m
336
337
338 class FunctionUnits(Elaboratable):
339
340 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
341 self.n_src, self.n_dst = n_src, n_dst
342 self.n_reg = n_reg
343 self.n_int_alus = nf = n_int_alus
344
345 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
346 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
347
348 self.readable_o = Signal(n_int_alus, reset_less=True)
349 self.writable_o = Signal(n_int_alus, reset_less=True)
350
351 # arrays
352 src = []
353 rsel = []
354 rd = []
355 for i in range(n_src):
356 j = i + 1 # name numbering to match src1/src2
357 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
358 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
359 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
360 dst = []
361 dsel = []
362 wr = []
363 for i in range(n_dst):
364 j = i + 1 # name numbering to match src1/src2
365 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
366 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
367 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
368 wpnd = []
369 pend = []
370 for i in range(nf):
371 j = i + 1 # name numbering to match src1/src2
372 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
373 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
374
375 self.dest_i = Array(dst) # Dest in (top)
376 self.src_i = Array(src) # oper in (top)
377
378 # for Register File Select Lines (horizontal), per-reg
379 self.dst_rsel_o = Array(dsel) # dest reg (bot)
380 self.src_rsel_o = Array(rsel) # src reg (bot)
381
382 self.go_rd_i = Array(rd)
383 self.go_wr_i = Array(wr)
384
385 self.go_die_i = Signal(n_int_alus, reset_less=True)
386 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
387
388 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
389
390 def elaborate(self, platform):
391 m = Module()
392 comb = m.d.comb
393 sync = m.d.sync
394
395 n_intfus = self.n_int_alus
396
397 # Integer FU-FU Dep Matrix
398 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
399 m.submodules.intfudeps = intfudeps
400 # Integer FU-Reg Dep Matrix
401 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
402 m.submodules.intregdeps = intregdeps
403
404 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
405 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
406
407 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
408 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
409
410 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
411 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
412 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
413
414 comb += intfudeps.issue_i.eq(self.fn_issue_i)
415 comb += intfudeps.go_die_i.eq(self.go_die_i)
416 comb += self.readable_o.eq(intfudeps.readable_o)
417 comb += self.writable_o.eq(intfudeps.writable_o)
418
419 # Connect function issue / arrays, and dest/src1/src2
420 for i in range(self.n_src):
421 print (i, self.go_rd_i, intfudeps.go_rd_i)
422 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
423 comb += intregdeps.src_i[i].eq(self.src_i[i])
424 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
425 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
426 for i in range(self.n_dst):
427 print (i, self.go_wr_i, intfudeps.go_wr_i)
428 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
429 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
430 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
431 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
432 comb += intregdeps.go_die_i.eq(self.go_die_i)
433 comb += intregdeps.issue_i.eq(self.fn_issue_i)
434
435 return m
436
437
438 class Scoreboard(Elaboratable):
439 def __init__(self, rwid, n_regs):
440 """ Inputs:
441
442 * :rwid: bit width of register file(s) - both FP and INT
443 * :n_regs: depth of register file(s) - number of FP and INT regs
444 """
445 self.rwid = rwid
446 self.n_regs = n_regs
447
448 # Register Files
449 self.intregs = RegFileArray(rwid, n_regs)
450 self.fpregs = RegFileArray(rwid, n_regs)
451
452 # Memory (test for now)
453 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
454
455 # issue q needs to get at these
456 self.aluissue = IssueUnitGroup(2)
457 self.lsissue = IssueUnitGroup(2)
458 self.brissue = IssueUnitGroup(1)
459 # and these
460 self.alu_op = CompALUOpSubset("alu")
461 self.br_oper_i = Signal(4, reset_less=True)
462 self.br_imm_i = Signal(rwid, reset_less=True)
463 self.ls_oper_i = Signal(4, reset_less=True)
464 self.ls_imm_i = Signal(rwid, reset_less=True)
465
466 # inputs
467 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
468 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
469 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
470 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
471
472 # outputs
473 self.issue_o = Signal(reset_less=True) # instruction was accepted
474 self.busy_o = Signal(reset_less=True) # at least one CU is busy
475
476 # for branch speculation experiment. branch_direction = 0 if
477 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
478 # branch_succ and branch_fail are requests to have the current
479 # instruction be dependent on the branch unit "shadow" capability.
480 self.branch_succ_i = Signal(reset_less=True)
481 self.branch_fail_i = Signal(reset_less=True)
482 self.branch_direction_o = Signal(2, reset_less=True)
483
484 def elaborate(self, platform):
485 m = Module()
486 comb = m.d.comb
487 sync = m.d.sync
488
489 m.submodules.intregs = self.intregs
490 m.submodules.fpregs = self.fpregs
491 m.submodules.mem = mem = self.mem
492
493 # register ports
494 int_dest = self.intregs.write_port("dest")
495 int_src1 = self.intregs.read_port("src1")
496 int_src2 = self.intregs.read_port("src2")
497
498 fp_dest = self.fpregs.write_port("dest")
499 fp_src1 = self.fpregs.read_port("src1")
500 fp_src2 = self.fpregs.read_port("src2")
501
502 # Int ALUs and BR ALUs
503 n_int_alus = 5
504 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
505 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
506
507 # LDST Comp Units
508 n_ldsts = 2
509 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
510
511 # Comp Units
512 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
513 bgt = cub.bgt # get at the branch computation unit
514 br1 = cub.br1
515
516 # Int FUs
517 fu_n_src = 2
518 fu_n_dst = 1
519 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
520 fu_n_src, fu_n_dst)
521
522 # Memory FUs
523 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
524
525 # Memory Priority Picker 1: one gateway per memory port
526 # picks 1 reader and 1 writer to intreg
527 mempick1 = GroupPicker(n_ldsts, 1, 1)
528 m.submodules.mempick1 = mempick1
529
530 # Count of number of FUs
531 n_intfus = n_int_alus
532 n_fp_fus = 0 # for now
533
534 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
535 # picks 1 reader and 1 writer to intreg
536 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
537 m.submodules.intpick1 = ipick1
538
539 # INT/FP Issue Unit
540 regdecode = RegDecode(self.n_regs)
541 m.submodules.regdecode = regdecode
542 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
543 m.submodules.issueunit = issueunit
544
545 # Shadow Matrix. currently n_intfus shadows, to be used for
546 # write-after-write hazards. NOTE: there is one extra for branches,
547 # so the shadow width is increased by 1
548 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
549 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
550
551 # record previous instruction to cast shadow on current instruction
552 prev_shadow = Signal(n_intfus)
553
554 # Branch Speculation recorder. tracks the success/fail state as
555 # each instruction is issued, so that when the branch occurs the
556 # allow/cancel can be issued as appropriate.
557 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
558
559 # ---------
560 # ok start wiring things together...
561 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
562 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
563 # ---------
564
565 # ---------
566 # Issue Unit is where it starts. set up some in/outs for this module
567 # ---------
568 comb += [regdecode.dest_i.eq(self.int_dest_i),
569 regdecode.src1_i.eq(self.int_src1_i),
570 regdecode.src2_i.eq(self.int_src2_i),
571 regdecode.enable_i.eq(self.reg_enable_i),
572 self.issue_o.eq(issueunit.issue_o)
573 ]
574
575 # take these to outside (issue needs them)
576 comb += cua.op.eq(self.alu_op)
577 comb += cub.oper_i.eq(self.br_oper_i)
578 comb += cub.imm_i.eq(self.br_imm_i)
579 comb += cul.oper_i.eq(self.ls_oper_i)
580 comb += cul.imm_i.eq(self.ls_imm_i)
581
582 # TODO: issueunit.f (FP)
583
584 # and int function issue / busy arrays, and dest/src1/src2
585 comb += intfus.dest_i[0].eq(regdecode.dest_o)
586 comb += intfus.src_i[0].eq(regdecode.src1_o)
587 comb += intfus.src_i[1].eq(regdecode.src2_o)
588
589 fn_issue_o = issueunit.fn_issue_o
590
591 comb += intfus.fn_issue_i.eq(fn_issue_o)
592 comb += issueunit.busy_i.eq(cu.busy_o)
593 comb += self.busy_o.eq(cu.busy_o.bool())
594
595 # ---------
596 # Memory Function Unit
597 # ---------
598 reset_b = Signal(cul.n_units, reset_less=True)
599 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
600
601 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
602 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
603 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
604
605 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
606 # in a transitive fashion). This cycle activates based on LDSTCompUnit
607 # issue_i. multi-issue gets a bit more complex but not a lot.
608 prior_ldsts = Signal(cul.n_units, reset_less=True)
609 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
610 with m.If(self.ls_oper_i[3]): # LD bit of operand
611 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
612 with m.If(self.ls_oper_i[2]): # ST bit of operand
613 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
614
615 # TODO: adr_rel_o needs to go into L1 Cache. for now,
616 # just immediately activate go_adr
617 comb += cul.go_ad_i.eq(cul.adr_rel_o)
618
619 # connect up address data
620 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
621 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
622
623 # connect loadable / storable to go_ld/go_st.
624 # XXX should only be done when the memory ld/st has actually happened!
625 go_st_i = Signal(cul.n_units, reset_less=True)
626 go_ld_i = Signal(cul.n_units, reset_less=True)
627 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
628 cul.adr_rel_o & cul.ld_o)
629 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
630 cul.sto_rel_o & cul.st_o)
631 comb += memfus.go_ld_i.eq(go_ld_i)
632 comb += memfus.go_st_i.eq(go_st_i)
633 #comb += cul.go_wr_i.eq(go_ld_i)
634 comb += cul.go_st_i.eq(go_st_i)
635
636 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
637 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
638 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
639
640 # ---------
641 # merge shadow matrices outputs
642 # ---------
643
644 # these are explained in ShadowMatrix docstring, and are to be
645 # connected to the FUReg and FUFU Matrices, to get them to reset
646 anydie = Signal(n_intfus, reset_less=True)
647 allshadown = Signal(n_intfus, reset_less=True)
648 shreset = Signal(n_intfus, reset_less=True)
649 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
650 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
651 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
652
653 # ---------
654 # connect fu-fu matrix
655 # ---------
656
657 # Group Picker... done manually for now.
658 go_rd_o = ipick1.go_rd_o
659 go_wr_o = ipick1.go_wr_o
660 go_rd_i = intfus.go_rd_i
661 go_wr_i = intfus.go_wr_i
662 go_die_i = intfus.go_die_i
663 # NOTE: connect to the shadowed versions so that they can "die" (reset)
664 for i in range(fu_n_src):
665 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
666 for i in range(fu_n_dst):
667 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
668 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
669
670 # Connect Picker
671 # ---------
672 int_rd_o = intfus.readable_o
673 rrel_o = cu.rd_rel_o
674 for i in range(fu_n_src):
675 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
676 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
677 int_wr_o = intfus.writable_o
678 for i in range(fu_n_dst):
679 comb += ipick1.req_rel_i[i][0:n_intfus].eq(cu.done_o[0:n_intfus])
680 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
681
682 # ---------
683 # Shadow Matrix
684 # ---------
685
686 comb += shadows.issue_i.eq(fn_issue_o)
687 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
688 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
689 # ---------
690 # NOTE; this setup is for the instruction order preservation...
691
692 # connect shadows / go_dies to Computation Units
693 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
694 comb += cu.go_die_i[0:n_intfus].eq(anydie)
695
696 # ok connect first n_int_fu shadows to busy lines, to create an
697 # instruction-order linked-list-like arrangement, using a bit-matrix
698 # (instead of e.g. a ring buffer).
699
700 # when written, the shadow can be cancelled (and was good)
701 for i in range(n_intfus):
702 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
703 # XXX experiment: use ~cu.busy_o instead. *should* be good
704 # because the comp unit is only free once completed
705 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
706
707 # *previous* instruction shadows *current* instruction, and, obviously,
708 # if the previous is completed (!busy) don't cast the shadow!
709 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
710 for i in range(n_intfus):
711 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
712
713 # ---------
714 # ... and this is for branch speculation. it uses the extra bit
715 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
716 # only needs to set shadow_i, s_fail_i and s_good_i
717
718 # issue captures shadow_i (if enabled)
719 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
720
721 bactive = Signal(reset_less=True)
722 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
723
724 # instruction being issued (fn_issue_o) has a shadow cast by the branch
725 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
726 comb += bshadow.issue_i.eq(fn_issue_o)
727 for i in range(n_intfus):
728 with m.If(fn_issue_o & (Const(1 << i))):
729 comb += bshadow.shadow_i[i][0].eq(1)
730
731 # finally, we need an indicator to the test infrastructure as to
732 # whether the branch succeeded or failed, plus, link up to the
733 # "recorder" of whether the instruction was under shadow or not
734
735 with m.If(br1.issue_i):
736 sync += bspec.active_i.eq(1)
737 with m.If(self.branch_succ_i):
738 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
739 with m.If(self.branch_fail_i):
740 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
741
742 # branch is active (TODO: a better signal: this is over-using the
743 # go_write signal - actually the branch should not be "writing")
744 with m.If(br1.go_wr_i):
745 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
746 sync += bspec.active_i.eq(0)
747 comb += bspec.br_i.eq(1)
748 # branch occurs if data == 1, failed if data == 0
749 comb += bspec.br_ok_i.eq(br1.data_o == 1)
750 for i in range(n_intfus):
751 # *expected* direction of the branch matched against *actual*
752 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
753 # ... or it didn't
754 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
755
756 # ---------
757 # Connect Register File(s)
758 # ---------
759 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
760 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
761 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
762
763 # connect ALUs to regfile
764 comb += int_dest.data_i.eq(cu.data_o)
765 comb += cu.src1_i.eq(int_src1.data_o)
766 comb += cu.src2_i.eq(int_src2.data_o)
767
768 # connect ALU Computation Units
769 for i in range(fu_n_src):
770 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
771 for i in range(fu_n_dst):
772 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
773 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
774
775 return m
776
777 def __iter__(self):
778 yield from self.intregs
779 yield from self.fpregs
780 yield self.int_dest_i
781 yield self.int_src1_i
782 yield self.int_src2_i
783 yield self.issue_o
784 yield self.branch_succ_i
785 yield self.branch_fail_i
786 yield self.branch_direction_o
787
788 def ports(self):
789 return list(self)
790
791
792 class IssueToScoreboard(Elaboratable):
793
794 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
795 self.qlen = qlen
796 self.n_in = n_in
797 self.n_out = n_out
798 self.rwid = rwid
799 self.opw = opwid
800 self.n_regs = n_regs
801
802 mqbits = unsigned(int(log(qlen) / log(2))+2)
803 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
804 self.p_ready_o = Signal() # instructions were added
805 self.data_i = Instruction._nq(n_in, "data_i")
806
807 self.busy_o = Signal(reset_less=True) # at least one CU is busy
808 self.qlen_o = Signal(mqbits, reset_less=True)
809
810 def elaborate(self, platform):
811 m = Module()
812 comb = m.d.comb
813 sync = m.d.sync
814
815 iq = InstructionQ(self.rwid, self.opw, self.qlen,
816 self.n_in, self.n_out)
817 sc = Scoreboard(self.rwid, self.n_regs)
818 m.submodules.iq = iq
819 m.submodules.sc = sc
820
821 # get at the regfile for testing
822 self.intregs = sc.intregs
823
824 # and the "busy" signal and instruction queue length
825 comb += self.busy_o.eq(sc.busy_o)
826 comb += self.qlen_o.eq(iq.qlen_o)
827
828 # link up instruction queue
829 comb += iq.p_add_i.eq(self.p_add_i)
830 comb += self.p_ready_o.eq(iq.p_ready_o)
831 for i in range(self.n_in):
832 comb += eq(iq.data_i[i], self.data_i[i])
833
834 # take instruction and process it. note that it's possible to
835 # "inspect" the queue contents *without* actually removing the
836 # items. items are only removed when the
837
838 # in "waiting" state
839 wait_issue_br = Signal()
840 wait_issue_alu = Signal()
841 wait_issue_ls = Signal()
842
843 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
844 # set instruction pop length to 1 if the unit accepted
845 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
846 with m.If(iq.qlen_o != 0):
847 comb += iq.n_sub_i.eq(1)
848 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
849 with m.If(iq.qlen_o != 0):
850 comb += iq.n_sub_i.eq(1)
851 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
852 with m.If(iq.qlen_o != 0):
853 comb += iq.n_sub_i.eq(1)
854
855 # see if some instruction(s) are here. note that this is
856 # "inspecting" the in-place queue. note also that on the
857 # cycle following "waiting" for fn_issue_o to be set, the
858 # "resetting" done above (insn_i=0) could be re-ASSERTed.
859 with m.If(iq.qlen_o != 0):
860 # get the operands and operation
861 instr = iq.data_o[0]
862 imm = instr.imm_data.data
863 dest = instr.write_reg.data
864 src1 = instr.read_reg1.data
865 src2 = instr.read_reg2.data
866 op = instr.insn_type
867 fu = instr.fn_unit
868 opi = instr.imm_data.ok # immediate set
869
870 # set the src/dest regs
871 comb += sc.int_dest_i.eq(dest)
872 comb += sc.int_src1_i.eq(src1)
873 comb += sc.int_src2_i.eq(src2)
874 comb += sc.reg_enable_i.eq(1) # enable the regfile
875
876 # choose a Function-Unit-Group
877 with m.If(fu == Function.ALU): # alu
878 comb += sc.alu_op.eq_from_execute1(instr)
879 comb += sc.aluissue.insn_i.eq(1)
880 comb += wait_issue_alu.eq(1)
881 with m.Elif((op & (0x3 << 2)) != 0): # branch
882 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
883 comb += sc.br_imm_i.eq(imm)
884 comb += sc.brissue.insn_i.eq(1)
885 comb += wait_issue_br.eq(1)
886 with m.Elif((op & (0x3 << 4)) != 0): # ld/st
887 # see compldst.py
888 # bit 0: ADD/SUB
889 # bit 1: immed
890 # bit 4: LD
891 # bit 5: ST
892 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
893 comb += sc.ls_imm_i.eq(imm)
894 comb += sc.lsissue.insn_i.eq(1)
895 comb += wait_issue_ls.eq(1)
896
897 # XXX TODO
898 # these indicate that the instruction is to be made
899 # shadow-dependent on
900 # (either) branch success or branch fail
901 # yield sc.branch_fail_i.eq(branch_fail)
902 # yield sc.branch_succ_i.eq(branch_success)
903
904 return m
905
906 def __iter__(self):
907 yield self.p_ready_o
908 for o in self.data_i:
909 yield from list(o)
910 yield self.p_add_i
911
912 def ports(self):
913 return list(self)
914
915
916 def power_instr_q(dut, pdecode2, ins, code):
917 instrs = [pdecode2.e]
918
919 sendlen = 1
920 for idx, instr in enumerate(instrs):
921 yield dut.data_i[idx].eq(instr)
922 insn_type = yield instr.insn_type
923 fn_unit = yield instr.fn_unit
924 print("senddata ", idx, insn_type, fn_unit, instr)
925 yield dut.p_add_i.eq(sendlen)
926 yield
927 o_p_ready = yield dut.p_ready_o
928 while not o_p_ready:
929 yield
930 o_p_ready = yield dut.p_ready_o
931
932 yield dut.p_add_i.eq(0)
933
934
935 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
936 branch_success, branch_fail):
937 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
938 'imm_data': (imm, op_imm),
939 'read_reg1': src1, 'read_reg2': src2}]
940
941 sendlen = 1
942 for idx, instr in enumerate(instrs):
943 imm, op_imm = instr['imm_data']
944 reg1 = instr['read_reg1']
945 reg2 = instr['read_reg2']
946 dest = instr['write_reg']
947 insn_type = instr['insn_type']
948 fn_unit = instr['fn_unit']
949 yield dut.data_i[idx].insn_type.eq(insn_type)
950 yield dut.data_i[idx].fn_unit.eq(fn_unit)
951 yield dut.data_i[idx].read_reg1.data.eq(reg1)
952 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
953 yield dut.data_i[idx].read_reg2.data.eq(reg2)
954 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
955 yield dut.data_i[idx].write_reg.data.eq(dest)
956 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
957 yield dut.data_i[idx].imm_data.data.eq(imm)
958 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
959 di = yield dut.data_i[idx]
960 print("senddata %d %x" % (idx, di))
961 yield dut.p_add_i.eq(sendlen)
962 yield
963 o_p_ready = yield dut.p_ready_o
964 while not o_p_ready:
965 yield
966 o_p_ready = yield dut.p_ready_o
967
968 yield dut.p_add_i.eq(0)
969
970
971 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
972 yield from disable_issue(dut)
973 yield dut.int_dest_i.eq(dest)
974 yield dut.int_src1_i.eq(src1)
975 yield dut.int_src2_i.eq(src2)
976 if (op & (0x3 << 2)) != 0: # branch
977 yield dut.brissue.insn_i.eq(1)
978 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
979 yield dut.br_imm_i.eq(imm)
980 dut_issue = dut.brissue
981 else:
982 yield dut.aluissue.insn_i.eq(1)
983 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
984 yield dut.alu_imm_i.eq(imm)
985 dut_issue = dut.aluissue
986 yield dut.reg_enable_i.eq(1)
987
988 # these indicate that the instruction is to be made shadow-dependent on
989 # (either) branch success or branch fail
990 yield dut.branch_fail_i.eq(branch_fail)
991 yield dut.branch_succ_i.eq(branch_success)
992
993 yield
994 yield from wait_for_issue(dut, dut_issue)
995
996
997 def print_reg(dut, rnums):
998 rs = []
999 for rnum in rnums:
1000 reg = yield dut.intregs.regs[rnum].reg
1001 rs.append("%x" % reg)
1002 rnums = map(str, rnums)
1003 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1004
1005
1006 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1007 insts = []
1008 for i in range(n_ops):
1009 src1 = randint(1, dut.n_regs-1)
1010 src2 = randint(1, dut.n_regs-1)
1011 imm = randint(1, (1 << dut.rwid)-1)
1012 dest = randint(1, dut.n_regs-1)
1013 op = randint(0, max_opnums)
1014 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
1015
1016 if shadowing:
1017 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1018 else:
1019 insts.append((src1, src2, dest, op, opi, imm))
1020 return insts
1021
1022
1023 def wait_for_busy_clear(dut):
1024 while True:
1025 busy_o = yield dut.busy_o
1026 if not busy_o:
1027 break
1028 print("busy",)
1029 yield
1030
1031
1032 def disable_issue(dut):
1033 yield dut.aluissue.insn_i.eq(0)
1034 yield dut.brissue.insn_i.eq(0)
1035 yield dut.lsissue.insn_i.eq(0)
1036
1037
1038 def wait_for_issue(dut, dut_issue):
1039 while True:
1040 issue_o = yield dut_issue.fn_issue_o
1041 if issue_o:
1042 yield from disable_issue(dut)
1043 yield dut.reg_enable_i.eq(0)
1044 break
1045 print("busy",)
1046 # yield from print_reg(dut, [1,2,3])
1047 yield
1048 # yield from print_reg(dut, [1,2,3])
1049
1050
1051 def scoreboard_branch_sim(dut, alusim):
1052
1053 iseed = 3
1054
1055 for i in range(1):
1056
1057 print("rseed", iseed)
1058 seed(iseed)
1059 iseed += 1
1060
1061 yield dut.branch_direction_o.eq(0)
1062
1063 # set random values in the registers
1064 for i in range(1, dut.n_regs):
1065 val = 31+i*3
1066 val = randint(0, (1 << alusim.rwidth)-1)
1067 yield dut.intregs.regs[i].reg.eq(val)
1068 alusim.setval(i, val)
1069
1070 if False:
1071 # create some instructions: branches create a tree
1072 insts = create_random_ops(dut, 1, True, 1)
1073 #insts.append((6, 6, 1, 2, (0, 0)))
1074 #insts.append((4, 3, 3, 0, (0, 0)))
1075
1076 src1 = randint(1, dut.n_regs-1)
1077 src2 = randint(1, dut.n_regs-1)
1078 #op = randint(4, 7)
1079 op = 4 # only BGT at the moment
1080
1081 branch_ok = create_random_ops(dut, 1, True, 1)
1082 branch_fail = create_random_ops(dut, 1, True, 1)
1083
1084 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1085
1086 if True:
1087 insts = []
1088 insts.append((3, 5, 2, 0, (0, 0)))
1089 branch_ok = []
1090 branch_fail = []
1091 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1092 branch_ok.append(None)
1093 branch_fail.append((1, 1, 2, 0, (0, 1)))
1094 #branch_fail.append( None )
1095 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1096
1097 siminsts = deepcopy(insts)
1098
1099 # issue instruction(s)
1100 i = -1
1101 instrs = insts
1102 branch_direction = 0
1103 while instrs:
1104 yield
1105 yield
1106 i += 1
1107 branch_direction = yield dut.branch_direction_o # way branch went
1108 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1109 if branch_direction == 1 and shadow_on:
1110 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1111 continue # branch was "success" and this is a "failed"... skip
1112 if branch_direction == 2 and shadow_off:
1113 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1114 continue # branch was "fail" and this is a "success"... skip
1115 if branch_direction != 0:
1116 shadow_on = 0
1117 shadow_off = 0
1118 is_branch = op >= 4
1119 if is_branch:
1120 branch_ok, branch_fail = dest
1121 dest = src2
1122 # ok zip up the branch success / fail instructions and
1123 # drop them into the queue, one marked "to have branch success"
1124 # the other to be marked shadow branch "fail".
1125 # one out of each of these will be cancelled
1126 for ok, fl in zip(branch_ok, branch_fail):
1127 if ok:
1128 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1129 if fl:
1130 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1131 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1132 (i, src1, src2, dest, op, shadow_on, shadow_off))
1133 yield from int_instr(dut, op, src1, src2, dest,
1134 shadow_on, shadow_off)
1135
1136 # wait for all instructions to stop before checking
1137 yield
1138 yield from wait_for_busy_clear(dut)
1139
1140 i = -1
1141 while siminsts:
1142 instr = siminsts.pop(0)
1143 if instr is None:
1144 continue
1145 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1146 i += 1
1147 is_branch = op >= 4
1148 if is_branch:
1149 branch_ok, branch_fail = dest
1150 dest = src2
1151 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1152 (i, src1, src2, dest, op, shadow_on, shadow_off))
1153 branch_res = alusim.op(op, src1, src2, dest)
1154 if is_branch:
1155 if branch_res:
1156 siminsts += branch_ok
1157 else:
1158 siminsts += branch_fail
1159
1160 # check status
1161 yield from alusim.check(dut)
1162 yield from alusim.dump(dut)
1163
1164
1165 def power_sim(m, dut, pdecode2, instruction, alusim):
1166
1167 seed(0)
1168
1169 for i in range(1):
1170
1171 # set random values in the registers
1172 for i in range(1, dut.n_regs):
1173 #val = randint(0, (1<<alusim.rwidth)-1)
1174 #val = 31+i*3
1175 val = i # XXX actually, not random at all
1176 yield dut.intregs.regs[i].reg.eq(val)
1177 alusim.setval(i, val)
1178
1179 # create some instructions
1180 lst = [#"addi 3, 0, 0x1234",
1181 #"addi 2, 0, 0x4321",
1182 "add 1, 3, 2"]
1183 with Program(lst) as program:
1184 gen = program.generate_instructions()
1185
1186 # issue instruction(s), wait for issue to be free before proceeding
1187 for ins, code in zip(gen, program.assembly.splitlines()):
1188 yield instruction.eq(ins) # raw binary instr.
1189 yield Delay(1e-6)
1190
1191 print("binary 0x{:X}".format(ins & 0xffffffff))
1192 print("assembly", code)
1193
1194 #alusim.op(op, opi, imm, src1, src2, dest)
1195 yield from power_instr_q(dut, pdecode2, ins, code)
1196
1197 # wait for all instructions to stop before checking
1198 while True:
1199 iqlen = yield dut.qlen_o
1200 if iqlen == 0:
1201 break
1202 yield
1203 yield
1204 yield
1205 yield
1206 yield
1207 yield from wait_for_busy_clear(dut)
1208
1209 # check status
1210 yield from alusim.check(dut)
1211 yield from alusim.dump(dut)
1212
1213
1214 def scoreboard_sim(dut, alusim):
1215
1216 seed(0)
1217
1218 for i in range(1):
1219
1220 # set random values in the registers
1221 for i in range(1, dut.n_regs):
1222 #val = randint(0, (1<<alusim.rwidth)-1)
1223 #val = 31+i*3
1224 val = i
1225 yield dut.intregs.regs[i].reg.eq(val)
1226 alusim.setval(i, val)
1227
1228 # create some instructions (some random, some regression tests)
1229 instrs = []
1230 if False:
1231 instrs = create_random_ops(dut, 15, True, 4)
1232
1233 if False: # LD/ST test (with immediate)
1234 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1235 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1236
1237 if False:
1238 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1239
1240 if False:
1241 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1242 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1243 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1244
1245 if True:
1246 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1247 0, 0, (0, 0)))
1248 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1249 0, 0, (0, 0)))
1250 if False:
1251 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1252 1, 7, (0, 0)))
1253 if False:
1254 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1255 0, 0, (0, 0)))
1256
1257 if False:
1258 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1259 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1260 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1261 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1262 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1263
1264 if False:
1265 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1266 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1267 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1268
1269 if False:
1270 instrs.append((5, 6, 2, 1))
1271 instrs.append((2, 2, 4, 0))
1272 #instrs.append((2, 2, 3, 1))
1273
1274 if False:
1275 instrs.append((2, 1, 2, 3))
1276
1277 if False:
1278 instrs.append((2, 6, 2, 1))
1279 instrs.append((2, 1, 2, 0))
1280
1281 if False:
1282 instrs.append((1, 2, 7, 2))
1283 instrs.append((7, 1, 5, 0))
1284 instrs.append((4, 4, 1, 1))
1285
1286 if False:
1287 instrs.append((5, 6, 2, 2))
1288 instrs.append((1, 1, 4, 1))
1289 instrs.append((6, 5, 3, 0))
1290
1291 if False:
1292 # Write-after-Write Hazard
1293 instrs.append((3, 6, 7, 2))
1294 instrs.append((4, 4, 7, 1))
1295
1296 if False:
1297 # self-read/write-after-write followed by Read-after-Write
1298 instrs.append((1, 1, 1, 1))
1299 instrs.append((1, 5, 3, 0))
1300
1301 if False:
1302 # Read-after-Write followed by self-read-after-write
1303 instrs.append((5, 6, 1, 2))
1304 instrs.append((1, 1, 1, 1))
1305
1306 if False:
1307 # self-read-write sandwich
1308 instrs.append((5, 6, 1, 2))
1309 instrs.append((1, 1, 1, 1))
1310 instrs.append((1, 5, 3, 0))
1311
1312 if False:
1313 # very weird failure
1314 instrs.append((5, 2, 5, 2))
1315 instrs.append((2, 6, 3, 0))
1316 instrs.append((4, 2, 2, 1))
1317
1318 if False:
1319 v1 = 4
1320 yield dut.intregs.regs[5].reg.eq(v1)
1321 alusim.setval(5, v1)
1322 yield dut.intregs.regs[3].reg.eq(5)
1323 alusim.setval(3, 5)
1324 instrs.append((5, 3, 3, 4, (0, 0)))
1325 instrs.append((4, 2, 1, 2, (0, 1)))
1326
1327 if False:
1328 v1 = 6
1329 yield dut.intregs.regs[5].reg.eq(v1)
1330 alusim.setval(5, v1)
1331 yield dut.intregs.regs[3].reg.eq(5)
1332 alusim.setval(3, 5)
1333 instrs.append((5, 3, 3, 4, (0, 0)))
1334 instrs.append((4, 2, 1, 2, (1, 0)))
1335
1336 if False:
1337 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1338 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1339 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1340 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1341 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1342 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1343 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1344 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1345 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1346
1347 # issue instruction(s), wait for issue to be free before proceeding
1348 for i, instr in enumerate(instrs):
1349 print (i, instr)
1350 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1351
1352 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1353 (i, src1, src2, dest, op, fn_unit, opi, imm))
1354 alusim.op(op, opi, imm, src1, src2, dest)
1355 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1356 br_ok, br_fail)
1357
1358 # wait for all instructions to stop before checking
1359 while True:
1360 iqlen = yield dut.qlen_o
1361 if iqlen == 0:
1362 break
1363 yield
1364 yield
1365 yield
1366 yield
1367 yield
1368 yield from wait_for_busy_clear(dut)
1369
1370 # check status
1371 yield from alusim.check(dut)
1372 yield from alusim.dump(dut)
1373
1374
1375 def test_scoreboard():
1376 regwidth = 64
1377 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1378 alusim = RegSim(regwidth, 8)
1379 memsim = MemSim(16, 8)
1380
1381 m = Module()
1382 comb = m.d.comb
1383 instruction = Signal(32)
1384
1385 # set up the decoder (and simulator, later)
1386 pdecode = create_pdecode()
1387 #simulator = ISA(pdecode, initial_regs)
1388
1389 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1390 m.submodules.sim = dut
1391
1392 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1393 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1394
1395 vl = rtlil.convert(m, ports=dut.ports())
1396 with open("test_scoreboard6600.il", "w") as f:
1397 f.write(vl)
1398
1399 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1400 vcd_name='test_powerboard6600.vcd')
1401
1402 #run_simulation(dut, scoreboard_sim(dut, alusim),
1403 # vcd_name='test_scoreboard6600.vcd')
1404
1405 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1406 # vcd_name='test_scoreboard6600.vcd')
1407
1408
1409 if __name__ == '__main__':
1410 test_scoreboard()