53d27ee978524659ae370e4515f6ac3f9e3672d1
[soc.git] / src / soc / experiment / score6600_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen.hdl.ast import unsigned
4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
5 from nmigen.back.pysim import Delay
6
7 from soc.regfile.regfile import RegFileArray, treereduce
8 from soc.scoremulti.fu_fu_matrix import FUFUDepMatrix
9 from soc.scoremulti.fu_reg_matrix import FURegDepMatrix
10 from soc.scoreboard.global_pending import GlobalPending
11 from soc.scoreboard.group_picker import GroupPicker
12 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
13 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
14 from soc.scoreboard.instruction_q import Instruction, InstructionQ
15 from soc.scoreboard.memfu import MemFunctionUnits
16
17 from soc.experiment.compalu import ComputationUnitNoDelay
18 from soc.experiment.compalu_multi import ComputationUnitNoDelay as MultiCompUnit
19 from soc.experiment.compldst import LDSTCompUnit
20 from soc.experiment.testmem import TestMemory
21
22 from soc.experiment.alu_hier import ALU, BranchALU, CompALUOpSubset
23
24 from soc.decoder.power_enums import InternalOp, Function
25 from soc.decoder.power_decoder import (create_pdecode)
26 from soc.decoder.power_decoder2 import (PowerDecode2)
27 from soc.simulator.program import Program
28
29
30 from nmutil.latch import SRLatch
31 from nmutil.nmoperator import eq
32
33 from random import randint, seed
34 from copy import deepcopy
35 from math import log
36
37 from soc.experiment.sim import RegSim, MemSim
38 from soc.experiment.sim import IADD, ISUB, IMUL, ISHF, IBGT, IBLT, IBEQ, IBNE
39
40
41 class CompUnitsBase(Elaboratable):
42 """ Computation Unit Base class.
43
44 Amazingly, this class works recursively. It's supposed to just
45 look after some ALUs (that can handle the same operations),
46 grouping them together, however it turns out that the same code
47 can also group *groups* of Computation Units together as well.
48
49 Basically it was intended just to concatenate the ALU's issue,
50 go_rd etc. signals together, which start out as bits and become
51 sequences. Turns out that the same trick works just as well
52 on Computation Units!
53
54 So this class may be used recursively to present a top-level
55 sequential concatenation of all the signals in and out of
56 ALUs, whilst at the same time making it convenient to group
57 ALUs together.
58
59 At the lower level, the intent is that groups of (identical)
60 ALUs may be passed the same operation. Even beyond that,
61 the intent is that that group of (identical) ALUs actually
62 share the *same pipeline* and as such become a "Concurrent
63 Computation Unit" as defined by Mitch Alsup (see section
64 11.4.9.3)
65 """
66
67 def __init__(self, rwid, units, ldstmode=False):
68 """ Inputs:
69
70 * :rwid: bit width of register file(s) - both FP and INT
71 * :units: sequence of ALUs (or CompUnitsBase derivatives)
72 """
73 self.units = units
74 self.ldstmode = ldstmode
75 self.rwid = rwid
76 self.rwid = rwid
77 if units and isinstance(units[0], CompUnitsBase):
78 self.n_units = 0
79 for u in self.units:
80 self.n_units += u.n_units
81 else:
82 self.n_units = len(units)
83
84 n_units = self.n_units
85
86 # inputs
87 self.issue_i = Signal(n_units, reset_less=True)
88 self.go_rd0_i = Signal(n_units, reset_less=True)
89 self.go_rd1_i = Signal(n_units, reset_less=True)
90 self.go_rd_i = [self.go_rd0_i, self.go_rd1_i] # XXX HACK!
91 self.go_wr_i = Signal(n_units, reset_less=True)
92 self.shadown_i = Signal(n_units, reset_less=True)
93 self.go_die_i = Signal(n_units, reset_less=True)
94 if ldstmode:
95 self.go_ad_i = Signal(n_units, reset_less=True)
96 self.go_st_i = Signal(n_units, reset_less=True)
97
98 # outputs
99 self.busy_o = Signal(n_units, reset_less=True)
100 self.rd_rel0_o = Signal(n_units, reset_less=True)
101 self.rd_rel1_o = Signal(n_units, reset_less=True)
102 self.rd_rel_o = [self.rd_rel0_o, self.rd_rel1_o] # HACK!
103 self.req_rel_o = Signal(n_units, reset_less=True)
104 self.done_o = Signal(n_units, reset_less=True)
105 if ldstmode:
106 self.ld_o = Signal(n_units, reset_less=True) # op is LD
107 self.st_o = Signal(n_units, reset_less=True) # op is ST
108 self.adr_rel_o = Signal(n_units, reset_less=True)
109 self.sto_rel_o = Signal(n_units, reset_less=True)
110 self.load_mem_o = Signal(n_units, reset_less=True)
111 self.stwd_mem_o = Signal(n_units, reset_less=True)
112 self.addr_o = Signal(rwid, reset_less=True)
113
114 # in/out register data (note: not register#, actual data)
115 self.data_o = Signal(rwid, reset_less=True)
116 self.src1_i = Signal(rwid, reset_less=True)
117 self.src2_i = Signal(rwid, reset_less=True)
118 # input operand
119
120 def elaborate(self, platform):
121 m = Module()
122 comb = m.d.comb
123
124 for i, alu in enumerate(self.units):
125 setattr(m.submodules, "comp%d" % i, alu)
126
127 go_rd_l0 = []
128 go_rd_l1 = []
129 go_wr_l = []
130 issue_l = []
131 busy_l = []
132 req_rel_l = []
133 done_l = []
134 rd_rel0_l = []
135 rd_rel1_l = []
136 shadow_l = []
137 godie_l = []
138 for alu in self.units:
139 req_rel_l.append(alu.req_rel_o)
140 done_l.append(alu.done_o)
141 shadow_l.append(alu.shadown_i)
142 godie_l.append(alu.go_die_i)
143 print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
144 if isinstance(alu, LDSTCompUnit) or \
145 isinstance(alu, ComputationUnitNoDelay):
146 if isinstance(alu, CompUnitsBase):
147 ulen = alu.n_units
148 else:
149 ulen = 1
150 rd_rel0_l.append(Const(0, 64)) # FIXME
151 rd_rel1_l.append(Const(0, 64)) # FIXME
152 dummy1 = Signal(ulen, reset_less=True)
153 dummy2 = Signal(ulen, reset_less=True)
154 dummy3 = Signal(ulen, reset_less=True)
155 dummy4 = Signal(ulen, reset_less=True)
156 dummy5 = Signal(ulen, reset_less=True)
157 go_wr_l.append(dummy1)
158 go_rd_l0.append(dummy2)
159 go_rd_l1.append(dummy3)
160 issue_l.append(dummy4)
161 busy_l.append(dummy5)
162 else:
163 rd_rel0_l.append(alu.rd_rel_o[0])
164 rd_rel1_l.append(alu.rd_rel_o[1])
165 go_wr_l.append(alu.go_wr_i[0])
166 go_rd_l0.append(alu.go_rd_i[0])
167 go_rd_l1.append(alu.go_rd_i[1])
168 issue_l.append(alu.issue_i)
169 busy_l.append(alu.busy_o)
170 comb += self.rd_rel0_o.eq(Cat(*rd_rel0_l))
171 comb += self.rd_rel1_o.eq(Cat(*rd_rel1_l))
172 comb += self.req_rel_o.eq(Cat(*req_rel_l))
173 comb += self.done_o.eq(Cat(*done_l))
174 comb += self.busy_o.eq(Cat(*busy_l))
175 comb += Cat(*godie_l).eq(self.go_die_i)
176 comb += Cat(*shadow_l).eq(self.shadown_i)
177 comb += Cat(*go_wr_l).eq(self.go_wr_i)
178 comb += Cat(*go_rd_l0).eq(self.go_rd0_i)
179 comb += Cat(*go_rd_l1).eq(self.go_rd1_i)
180 comb += Cat(*issue_l).eq(self.issue_i)
181
182 # connect data register input/output
183
184 # merge (OR) all integer FU / ALU outputs to a single value
185 # XXX NOTE: this only works because there is a single "port"
186 # protected by a single go_wr. multi-issue requires a bus
187 # to be inserted here.
188 if self.units:
189 data_o = treereduce(self.units, "data_o")
190 comb += self.data_o.eq(data_o)
191 if self.ldstmode:
192 addr_o = treereduce(self.units, "addr_o")
193 comb += self.addr_o.eq(addr_o)
194
195 for i, alu in enumerate(self.units):
196 comb += alu.src1_i.eq(self.src1_i)
197 comb += alu.src2_i.eq(self.src2_i)
198
199 if not self.ldstmode:
200 return m
201
202 ldmem_l = []
203 stmem_l = []
204 go_ad_l = []
205 go_st_l = []
206 ld_l = []
207 st_l = []
208 adr_rel_l = []
209 sto_rel_l = []
210 for alu in self.units:
211 ld_l.append(alu.ld_o)
212 st_l.append(alu.st_o)
213 adr_rel_l.append(alu.adr_rel_o)
214 sto_rel_l.append(alu.sto_rel_o)
215 ldmem_l.append(alu.load_mem_o)
216 stmem_l.append(alu.stwd_mem_o)
217 go_ad_l.append(alu.go_ad_i)
218 go_st_l.append(alu.go_st_i)
219 comb += self.ld_o.eq(Cat(*ld_l))
220 comb += self.st_o.eq(Cat(*st_l))
221 comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
222 comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
223 comb += self.load_mem_o.eq(Cat(*ldmem_l))
224 comb += self.stwd_mem_o.eq(Cat(*stmem_l))
225 comb += Cat(*go_ad_l).eq(self.go_ad_i)
226 comb += Cat(*go_st_l).eq(self.go_st_i)
227
228 return m
229
230
231 class CompUnitLDSTs(CompUnitsBase):
232
233 def __init__(self, rwid, opwid, n_ldsts, mem):
234 """ Inputs:
235
236 * :rwid: bit width of register file(s) - both FP and INT
237 * :opwid: operand bit width
238 """
239 self.opwid = opwid
240
241 # inputs
242 self.oper_i = Signal(opwid, reset_less=True)
243 self.imm_i = Signal(rwid, reset_less=True)
244
245 # Int ALUs
246 self.alus = []
247 for i in range(n_ldsts):
248 self.alus.append(ALU(rwid))
249
250 units = []
251 for alu in self.alus:
252 aluopwid = 4 # see compldst.py for "internal" opcode
253 units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
254
255 CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
256
257 def elaborate(self, platform):
258 m = CompUnitsBase.elaborate(self, platform)
259 comb = m.d.comb
260
261 # hand the same operation to all units, 4 lower bits though
262 for alu in self.units:
263 comb += alu.oper_i[0:4].eq(self.oper_i)
264 comb += alu.imm_i.eq(self.imm_i)
265 comb += alu.isalu_i.eq(0)
266
267 return m
268
269
270 class CompUnitALUs(CompUnitsBase):
271
272 def __init__(self, rwid, opwid, n_alus):
273 """ Inputs:
274
275 * :rwid: bit width of register file(s) - both FP and INT
276 * :opwid: operand bit width
277 """
278 self.opwid = opwid
279
280 # inputs
281 self.op = CompALUOpSubset("cua_i")
282 self.oper_i = Signal(opwid, reset_less=True)
283 self.imm_i = Signal(rwid, reset_less=True)
284
285 # Int ALUs
286 alus = []
287 for i in range(n_alus):
288 alus.append(ALU(rwid))
289
290 units = []
291 for alu in alus:
292 aluopwid = 3 # extra bit for immediate mode
293 units.append(MultiCompUnit(rwid, alu))
294
295 CompUnitsBase.__init__(self, rwid, units)
296
297 def elaborate(self, platform):
298 m = CompUnitsBase.elaborate(self, platform)
299 comb = m.d.comb
300
301 # hand the subset of operation to ALUs
302 for alu in self.units:
303 comb += alu.oper_i.eq(self.op)
304 #comb += alu.oper_i[0:3].eq(self.oper_i)
305 #comb += alu.imm_i.eq(self.imm_i)
306
307 return m
308
309
310 class CompUnitBR(CompUnitsBase):
311
312 def __init__(self, rwid, opwid):
313 """ Inputs:
314
315 * :rwid: bit width of register file(s) - both FP and INT
316 * :opwid: operand bit width
317
318 Note: bgt unit is returned so that a shadow unit can be created
319 for it
320 """
321 self.opwid = opwid
322
323 # inputs
324 self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
325 self.oper_i = Signal(opwid, reset_less=True)
326 self.imm_i = Signal(rwid, reset_less=True)
327
328 # Branch ALU and CU
329 self.bgt = BranchALU(rwid)
330 aluopwid = 3 # extra bit for immediate mode
331 self.br1 = MultiCompUnit(rwid, self.bgt)
332 CompUnitsBase.__init__(self, rwid, [self.br1])
333
334 def elaborate(self, platform):
335 m = CompUnitsBase.elaborate(self, platform)
336 comb = m.d.comb
337
338 # hand the same operation to all units
339 for alu in self.units:
340 #comb += alu.oper_i.eq(self.op) # TODO
341 comb += alu.oper_i.eq(self.oper_i)
342 #comb += alu.imm_i.eq(self.imm_i)
343
344 return m
345
346
347 class FunctionUnits(Elaboratable):
348
349 def __init__(self, n_reg, n_int_alus, n_src, n_dst):
350 self.n_src, self.n_dst = n_src, n_dst
351 self.n_reg = n_reg
352 self.n_int_alus = nf = n_int_alus
353
354 self.g_int_rd_pend_o = Signal(n_reg, reset_less=True)
355 self.g_int_wr_pend_o = Signal(n_reg, reset_less=True)
356
357 self.readable_o = Signal(n_int_alus, reset_less=True)
358 self.writable_o = Signal(n_int_alus, reset_less=True)
359
360 # arrays
361 src = []
362 rsel = []
363 rd = []
364 for i in range(n_src):
365 j = i + 1 # name numbering to match src1/src2
366 src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
367 rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
368 rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
369 dst = []
370 dsel = []
371 wr = []
372 for i in range(n_dst):
373 j = i + 1 # name numbering to match src1/src2
374 dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
375 dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
376 wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
377 wpnd = []
378 pend = []
379 for i in range(nf):
380 j = i + 1 # name numbering to match src1/src2
381 pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
382 wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
383
384 self.dest_i = Array(dst) # Dest in (top)
385 self.src_i = Array(src) # oper in (top)
386
387 # for Register File Select Lines (horizontal), per-reg
388 self.dst_rsel_o = Array(dsel) # dest reg (bot)
389 self.src_rsel_o = Array(rsel) # src reg (bot)
390
391 self.go_rd_i = Array(rd)
392 self.go_wr_i = Array(wr)
393
394 self.go_die_i = Signal(n_int_alus, reset_less=True)
395 self.fn_issue_i = Signal(n_int_alus, reset_less=True)
396
397 # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
398
399 def elaborate(self, platform):
400 m = Module()
401 comb = m.d.comb
402 sync = m.d.sync
403
404 n_intfus = self.n_int_alus
405
406 # Integer FU-FU Dep Matrix
407 intfudeps = FUFUDepMatrix(n_intfus, n_intfus, 2, 1)
408 m.submodules.intfudeps = intfudeps
409 # Integer FU-Reg Dep Matrix
410 intregdeps = FURegDepMatrix(n_intfus, self.n_reg, 2, 1)
411 m.submodules.intregdeps = intregdeps
412
413 comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
414 comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
415
416 comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
417 comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
418
419 comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
420 comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
421 self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
422
423 comb += intfudeps.issue_i.eq(self.fn_issue_i)
424 comb += intfudeps.go_die_i.eq(self.go_die_i)
425 comb += self.readable_o.eq(intfudeps.readable_o)
426 comb += self.writable_o.eq(intfudeps.writable_o)
427
428 # Connect function issue / arrays, and dest/src1/src2
429 for i in range(self.n_src):
430 print (i, self.go_rd_i, intfudeps.go_rd_i)
431 comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
432 comb += intregdeps.src_i[i].eq(self.src_i[i])
433 comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
434 comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
435 for i in range(self.n_dst):
436 print (i, self.go_wr_i, intfudeps.go_wr_i)
437 comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
438 comb += intregdeps.dest_i[i].eq(self.dest_i[i])
439 comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
440 comb += self.dst_rsel_o[i].eq(intregdeps.dest_rsel_o[i])
441 comb += intregdeps.go_die_i.eq(self.go_die_i)
442 comb += intregdeps.issue_i.eq(self.fn_issue_i)
443
444 return m
445
446
447 class Scoreboard(Elaboratable):
448 def __init__(self, rwid, n_regs):
449 """ Inputs:
450
451 * :rwid: bit width of register file(s) - both FP and INT
452 * :n_regs: depth of register file(s) - number of FP and INT regs
453 """
454 self.rwid = rwid
455 self.n_regs = n_regs
456
457 # Register Files
458 self.intregs = RegFileArray(rwid, n_regs)
459 self.fpregs = RegFileArray(rwid, n_regs)
460
461 # Memory (test for now)
462 self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
463
464 # issue q needs to get at these
465 self.aluissue = IssueUnitGroup(2)
466 self.lsissue = IssueUnitGroup(2)
467 self.brissue = IssueUnitGroup(1)
468 # and these
469 self.alu_op = CompALUOpSubset("alu")
470 self.br_oper_i = Signal(4, reset_less=True)
471 self.br_imm_i = Signal(rwid, reset_less=True)
472 self.ls_oper_i = Signal(4, reset_less=True)
473 self.ls_imm_i = Signal(rwid, reset_less=True)
474
475 # inputs
476 self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
477 self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
478 self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
479 self.reg_enable_i = Signal(reset_less=True) # enable reg decode
480
481 # outputs
482 self.issue_o = Signal(reset_less=True) # instruction was accepted
483 self.busy_o = Signal(reset_less=True) # at least one CU is busy
484
485 # for branch speculation experiment. branch_direction = 0 if
486 # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
487 # branch_succ and branch_fail are requests to have the current
488 # instruction be dependent on the branch unit "shadow" capability.
489 self.branch_succ_i = Signal(reset_less=True)
490 self.branch_fail_i = Signal(reset_less=True)
491 self.branch_direction_o = Signal(2, reset_less=True)
492
493 def elaborate(self, platform):
494 m = Module()
495 comb = m.d.comb
496 sync = m.d.sync
497
498 m.submodules.intregs = self.intregs
499 m.submodules.fpregs = self.fpregs
500 m.submodules.mem = mem = self.mem
501
502 # register ports
503 int_dest = self.intregs.write_port("dest")
504 int_src1 = self.intregs.read_port("src1")
505 int_src2 = self.intregs.read_port("src2")
506
507 fp_dest = self.fpregs.write_port("dest")
508 fp_src1 = self.fpregs.read_port("src1")
509 fp_src2 = self.fpregs.read_port("src2")
510
511 # Int ALUs and BR ALUs
512 n_int_alus = 5
513 cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
514 cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
515
516 # LDST Comp Units
517 n_ldsts = 2
518 cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
519
520 # Comp Units
521 m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
522 bgt = cub.bgt # get at the branch computation unit
523 br1 = cub.br1
524
525 # Int FUs
526 fu_n_src = 2
527 fu_n_dst = 1
528 m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus,
529 fu_n_src, fu_n_dst)
530
531 # Memory FUs
532 m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
533
534 # Memory Priority Picker 1: one gateway per memory port
535 # picks 1 reader and 1 writer to intreg
536 mempick1 = GroupPicker(n_ldsts, 1, 1)
537 m.submodules.mempick1 = mempick1
538
539 # Count of number of FUs
540 n_intfus = n_int_alus
541 n_fp_fus = 0 # for now
542
543 # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
544 # picks 1 reader and 1 writer to intreg
545 ipick1 = GroupPicker(n_intfus, fu_n_src, fu_n_dst)
546 m.submodules.intpick1 = ipick1
547
548 # INT/FP Issue Unit
549 regdecode = RegDecode(self.n_regs)
550 m.submodules.regdecode = regdecode
551 issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
552 m.submodules.issueunit = issueunit
553
554 # Shadow Matrix. currently n_intfus shadows, to be used for
555 # write-after-write hazards. NOTE: there is one extra for branches,
556 # so the shadow width is increased by 1
557 m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
558 m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
559
560 # record previous instruction to cast shadow on current instruction
561 prev_shadow = Signal(n_intfus)
562
563 # Branch Speculation recorder. tracks the success/fail state as
564 # each instruction is issued, so that when the branch occurs the
565 # allow/cancel can be issued as appropriate.
566 m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
567
568 # ---------
569 # ok start wiring things together...
570 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
571 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
572 # ---------
573
574 # ---------
575 # Issue Unit is where it starts. set up some in/outs for this module
576 # ---------
577 comb += [regdecode.dest_i.eq(self.int_dest_i),
578 regdecode.src1_i.eq(self.int_src1_i),
579 regdecode.src2_i.eq(self.int_src2_i),
580 regdecode.enable_i.eq(self.reg_enable_i),
581 self.issue_o.eq(issueunit.issue_o)
582 ]
583
584 # take these to outside (issue needs them)
585 comb += cua.op.eq(self.alu_op)
586 comb += cub.oper_i.eq(self.br_oper_i)
587 comb += cub.imm_i.eq(self.br_imm_i)
588 comb += cul.oper_i.eq(self.ls_oper_i)
589 comb += cul.imm_i.eq(self.ls_imm_i)
590
591 # TODO: issueunit.f (FP)
592
593 # and int function issue / busy arrays, and dest/src1/src2
594 comb += intfus.dest_i[0].eq(regdecode.dest_o)
595 comb += intfus.src_i[0].eq(regdecode.src1_o)
596 comb += intfus.src_i[1].eq(regdecode.src2_o)
597
598 fn_issue_o = issueunit.fn_issue_o
599
600 comb += intfus.fn_issue_i.eq(fn_issue_o)
601 comb += issueunit.busy_i.eq(cu.busy_o)
602 comb += self.busy_o.eq(cu.busy_o.bool())
603
604 # ---------
605 # Memory Function Unit
606 # ---------
607 reset_b = Signal(cul.n_units, reset_less=True)
608 sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
609
610 comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
611 comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
612 comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
613
614 # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
615 # in a transitive fashion). This cycle activates based on LDSTCompUnit
616 # issue_i. multi-issue gets a bit more complex but not a lot.
617 prior_ldsts = Signal(cul.n_units, reset_less=True)
618 sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
619 with m.If(self.ls_oper_i[3]): # LD bit of operand
620 comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
621 with m.If(self.ls_oper_i[2]): # ST bit of operand
622 comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
623
624 # TODO: adr_rel_o needs to go into L1 Cache. for now,
625 # just immediately activate go_adr
626 comb += cul.go_ad_i.eq(cul.adr_rel_o)
627
628 # connect up address data
629 comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
630 comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
631
632 # connect loadable / storable to go_ld/go_st.
633 # XXX should only be done when the memory ld/st has actually happened!
634 go_st_i = Signal(cul.n_units, reset_less=True)
635 go_ld_i = Signal(cul.n_units, reset_less=True)
636 comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &
637 cul.adr_rel_o & cul.ld_o)
638 comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &
639 cul.sto_rel_o & cul.st_o)
640 comb += memfus.go_ld_i.eq(go_ld_i)
641 comb += memfus.go_st_i.eq(go_st_i)
642 #comb += cul.go_wr_i.eq(go_ld_i)
643 comb += cul.go_st_i.eq(go_st_i)
644
645 #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
646 #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
647 #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
648
649 # ---------
650 # merge shadow matrices outputs
651 # ---------
652
653 # these are explained in ShadowMatrix docstring, and are to be
654 # connected to the FUReg and FUFU Matrices, to get them to reset
655 anydie = Signal(n_intfus, reset_less=True)
656 allshadown = Signal(n_intfus, reset_less=True)
657 shreset = Signal(n_intfus, reset_less=True)
658 comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
659 comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
660 comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
661
662 # ---------
663 # connect fu-fu matrix
664 # ---------
665
666 # Group Picker... done manually for now.
667 go_rd_o = ipick1.go_rd_o
668 go_wr_o = ipick1.go_wr_o
669 go_rd_i = intfus.go_rd_i
670 go_wr_i = intfus.go_wr_i
671 go_die_i = intfus.go_die_i
672 # NOTE: connect to the shadowed versions so that they can "die" (reset)
673 for i in range(fu_n_src):
674 comb += go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus]) # rd
675 for i in range(fu_n_dst):
676 comb += go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus]) # wr
677 comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
678
679 # Connect Picker
680 # ---------
681 int_rd_o = intfus.readable_o
682 rrel_o = cu.rd_rel_o
683 rqrl_o = cu.req_rel_o
684 for i in range(fu_n_src):
685 comb += ipick1.rd_rel_i[i][0:n_intfus].eq(rrel_o[i][0:n_intfus])
686 comb += ipick1.readable_i[i][0:n_intfus].eq(int_rd_o[0:n_intfus])
687 int_wr_o = intfus.writable_o
688 for i in range(fu_n_dst):
689 # XXX FIXME: rqrl_o[i] here
690 comb += ipick1.req_rel_i[i][0:n_intfus].eq(rqrl_o[0:n_intfus])
691 comb += ipick1.writable_i[i][0:n_intfus].eq(int_wr_o[0:n_intfus])
692
693 # ---------
694 # Shadow Matrix
695 # ---------
696
697 comb += shadows.issue_i.eq(fn_issue_o)
698 #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
699 comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
700 # ---------
701 # NOTE; this setup is for the instruction order preservation...
702
703 # connect shadows / go_dies to Computation Units
704 comb += cu.shadown_i[0:n_intfus].eq(allshadown)
705 comb += cu.go_die_i[0:n_intfus].eq(anydie)
706
707 # ok connect first n_int_fu shadows to busy lines, to create an
708 # instruction-order linked-list-like arrangement, using a bit-matrix
709 # (instead of e.g. a ring buffer).
710
711 # when written, the shadow can be cancelled (and was good)
712 for i in range(n_intfus):
713 #comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
714 # XXX experiment: use ~cu.busy_o instead. *should* be good
715 # because the comp unit is only free once completed
716 comb += shadows.s_good_i[i][0:n_intfus].eq(~cu.busy_o[0:n_intfus])
717
718 # *previous* instruction shadows *current* instruction, and, obviously,
719 # if the previous is completed (!busy) don't cast the shadow!
720 comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
721 for i in range(n_intfus):
722 comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
723
724 # ---------
725 # ... and this is for branch speculation. it uses the extra bit
726 # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
727 # only needs to set shadow_i, s_fail_i and s_good_i
728
729 # issue captures shadow_i (if enabled)
730 comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
731
732 bactive = Signal(reset_less=True)
733 comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
734
735 # instruction being issued (fn_issue_o) has a shadow cast by the branch
736 with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
737 comb += bshadow.issue_i.eq(fn_issue_o)
738 for i in range(n_intfus):
739 with m.If(fn_issue_o & (Const(1 << i))):
740 comb += bshadow.shadow_i[i][0].eq(1)
741
742 # finally, we need an indicator to the test infrastructure as to
743 # whether the branch succeeded or failed, plus, link up to the
744 # "recorder" of whether the instruction was under shadow or not
745
746 with m.If(br1.issue_i):
747 sync += bspec.active_i.eq(1)
748 with m.If(self.branch_succ_i):
749 comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
750 with m.If(self.branch_fail_i):
751 comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
752
753 # branch is active (TODO: a better signal: this is over-using the
754 # go_write signal - actually the branch should not be "writing")
755 with m.If(br1.go_wr_i):
756 sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
757 sync += bspec.active_i.eq(0)
758 comb += bspec.br_i.eq(1)
759 # branch occurs if data == 1, failed if data == 0
760 comb += bspec.br_ok_i.eq(br1.data_o == 1)
761 for i in range(n_intfus):
762 # *expected* direction of the branch matched against *actual*
763 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
764 # ... or it didn't
765 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
766
767 # ---------
768 # Connect Register File(s)
769 # ---------
770 comb += int_dest.wen.eq(intfus.dst_rsel_o[0])
771 comb += int_src1.ren.eq(intfus.src_rsel_o[0])
772 comb += int_src2.ren.eq(intfus.src_rsel_o[1])
773
774 # connect ALUs to regfile
775 comb += int_dest.data_i.eq(cu.data_o)
776 comb += cu.src1_i.eq(int_src1.data_o)
777 comb += cu.src2_i.eq(int_src2.data_o)
778
779 # connect ALU Computation Units
780 for i in range(fu_n_src):
781 comb += cu.go_rd_i[i][0:n_intfus].eq(go_rd_o[i][0:n_intfus])
782 for i in range(fu_n_dst):
783 comb += cu.go_wr_i[i][0:n_intfus].eq(go_wr_o[i][0:n_intfus])
784 comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
785
786 return m
787
788 def __iter__(self):
789 yield from self.intregs
790 yield from self.fpregs
791 yield self.int_dest_i
792 yield self.int_src1_i
793 yield self.int_src2_i
794 yield self.issue_o
795 yield self.branch_succ_i
796 yield self.branch_fail_i
797 yield self.branch_direction_o
798
799 def ports(self):
800 return list(self)
801
802
803 class IssueToScoreboard(Elaboratable):
804
805 def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
806 self.qlen = qlen
807 self.n_in = n_in
808 self.n_out = n_out
809 self.rwid = rwid
810 self.opw = opwid
811 self.n_regs = n_regs
812
813 mqbits = unsigned(int(log(qlen) / log(2))+2)
814 self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
815 self.p_ready_o = Signal() # instructions were added
816 self.data_i = Instruction._nq(n_in, "data_i")
817
818 self.busy_o = Signal(reset_less=True) # at least one CU is busy
819 self.qlen_o = Signal(mqbits, reset_less=True)
820
821 def elaborate(self, platform):
822 m = Module()
823 comb = m.d.comb
824 sync = m.d.sync
825
826 iq = InstructionQ(self.rwid, self.opw, self.qlen,
827 self.n_in, self.n_out)
828 sc = Scoreboard(self.rwid, self.n_regs)
829 m.submodules.iq = iq
830 m.submodules.sc = sc
831
832 # get at the regfile for testing
833 self.intregs = sc.intregs
834
835 # and the "busy" signal and instruction queue length
836 comb += self.busy_o.eq(sc.busy_o)
837 comb += self.qlen_o.eq(iq.qlen_o)
838
839 # link up instruction queue
840 comb += iq.p_add_i.eq(self.p_add_i)
841 comb += self.p_ready_o.eq(iq.p_ready_o)
842 for i in range(self.n_in):
843 comb += eq(iq.data_i[i], self.data_i[i])
844
845 # take instruction and process it. note that it's possible to
846 # "inspect" the queue contents *without* actually removing the
847 # items. items are only removed when the
848
849 # in "waiting" state
850 wait_issue_br = Signal()
851 wait_issue_alu = Signal()
852 wait_issue_ls = Signal()
853
854 with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
855 # set instruction pop length to 1 if the unit accepted
856 with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
857 with m.If(iq.qlen_o != 0):
858 comb += iq.n_sub_i.eq(1)
859 with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
860 with m.If(iq.qlen_o != 0):
861 comb += iq.n_sub_i.eq(1)
862 with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
863 with m.If(iq.qlen_o != 0):
864 comb += iq.n_sub_i.eq(1)
865
866 # see if some instruction(s) are here. note that this is
867 # "inspecting" the in-place queue. note also that on the
868 # cycle following "waiting" for fn_issue_o to be set, the
869 # "resetting" done above (insn_i=0) could be re-ASSERTed.
870 with m.If(iq.qlen_o != 0):
871 # get the operands and operation
872 instr = iq.data_o[0]
873 imm = instr.imm_data.data
874 dest = instr.write_reg.data
875 src1 = instr.read_reg1.data
876 src2 = instr.read_reg2.data
877 op = instr.insn_type
878 fu = instr.fn_unit
879 opi = instr.imm_data.ok # immediate set
880
881 # set the src/dest regs
882 comb += sc.int_dest_i.eq(dest)
883 comb += sc.int_src1_i.eq(src1)
884 comb += sc.int_src2_i.eq(src2)
885 comb += sc.reg_enable_i.eq(1) # enable the regfile
886
887 # choose a Function-Unit-Group
888 with m.If(fu == Function.ALU): # alu
889 comb += sc.alu_op.eq_from_execute1(instr)
890 comb += sc.aluissue.insn_i.eq(1)
891 comb += wait_issue_alu.eq(1)
892 with m.Elif((op & (0x3 << 2)) != 0): # branch
893 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
894 comb += sc.br_imm_i.eq(imm)
895 comb += sc.brissue.insn_i.eq(1)
896 comb += wait_issue_br.eq(1)
897 with m.Elif((op & (0x3 << 4)) != 0): # ld/st
898 # see compldst.py
899 # bit 0: ADD/SUB
900 # bit 1: immed
901 # bit 4: LD
902 # bit 5: ST
903 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
904 comb += sc.ls_imm_i.eq(imm)
905 comb += sc.lsissue.insn_i.eq(1)
906 comb += wait_issue_ls.eq(1)
907
908 # XXX TODO
909 # these indicate that the instruction is to be made
910 # shadow-dependent on
911 # (either) branch success or branch fail
912 # yield sc.branch_fail_i.eq(branch_fail)
913 # yield sc.branch_succ_i.eq(branch_success)
914
915 return m
916
917 def __iter__(self):
918 yield self.p_ready_o
919 for o in self.data_i:
920 yield from list(o)
921 yield self.p_add_i
922
923 def ports(self):
924 return list(self)
925
926
927 def power_instr_q(dut, pdecode2, ins, code):
928 instrs = [pdecode2.e]
929
930 sendlen = 1
931 for idx, instr in enumerate(instrs):
932 yield dut.data_i[idx].eq(instr)
933 insn_type = yield instr.insn_type
934 fn_unit = yield instr.fn_unit
935 print("senddata ", idx, insn_type, fn_unit, instr)
936 yield dut.p_add_i.eq(sendlen)
937 yield
938 o_p_ready = yield dut.p_ready_o
939 while not o_p_ready:
940 yield
941 o_p_ready = yield dut.p_ready_o
942
943 yield dut.p_add_i.eq(0)
944
945
946 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
947 branch_success, branch_fail):
948 instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
949 'imm_data': (imm, op_imm),
950 'read_reg1': src1, 'read_reg2': src2}]
951
952 sendlen = 1
953 for idx, instr in enumerate(instrs):
954 imm, op_imm = instr['imm_data']
955 reg1 = instr['read_reg1']
956 reg2 = instr['read_reg2']
957 dest = instr['write_reg']
958 insn_type = instr['insn_type']
959 fn_unit = instr['fn_unit']
960 yield dut.data_i[idx].insn_type.eq(insn_type)
961 yield dut.data_i[idx].fn_unit.eq(fn_unit)
962 yield dut.data_i[idx].read_reg1.data.eq(reg1)
963 yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
964 yield dut.data_i[idx].read_reg2.data.eq(reg2)
965 yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
966 yield dut.data_i[idx].write_reg.data.eq(dest)
967 yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
968 yield dut.data_i[idx].imm_data.data.eq(imm)
969 yield dut.data_i[idx].imm_data.ok.eq(op_imm)
970 di = yield dut.data_i[idx]
971 print("senddata %d %x" % (idx, di))
972 yield dut.p_add_i.eq(sendlen)
973 yield
974 o_p_ready = yield dut.p_ready_o
975 while not o_p_ready:
976 yield
977 o_p_ready = yield dut.p_ready_o
978
979 yield dut.p_add_i.eq(0)
980
981
982 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
983 yield from disable_issue(dut)
984 yield dut.int_dest_i.eq(dest)
985 yield dut.int_src1_i.eq(src1)
986 yield dut.int_src2_i.eq(src2)
987 if (op & (0x3 << 2)) != 0: # branch
988 yield dut.brissue.insn_i.eq(1)
989 yield dut.br_oper_i.eq(Const(op & 0x3, 2))
990 yield dut.br_imm_i.eq(imm)
991 dut_issue = dut.brissue
992 else:
993 yield dut.aluissue.insn_i.eq(1)
994 yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
995 yield dut.alu_imm_i.eq(imm)
996 dut_issue = dut.aluissue
997 yield dut.reg_enable_i.eq(1)
998
999 # these indicate that the instruction is to be made shadow-dependent on
1000 # (either) branch success or branch fail
1001 yield dut.branch_fail_i.eq(branch_fail)
1002 yield dut.branch_succ_i.eq(branch_success)
1003
1004 yield
1005 yield from wait_for_issue(dut, dut_issue)
1006
1007
1008 def print_reg(dut, rnums):
1009 rs = []
1010 for rnum in rnums:
1011 reg = yield dut.intregs.regs[rnum].reg
1012 rs.append("%x" % reg)
1013 rnums = map(str, rnums)
1014 print("reg %s: %s" % (','.join(rnums), ','.join(rs)))
1015
1016
1017 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
1018 insts = []
1019 for i in range(n_ops):
1020 src1 = randint(1, dut.n_regs-1)
1021 src2 = randint(1, dut.n_regs-1)
1022 imm = randint(1, (1 << dut.rwid)-1)
1023 dest = randint(1, dut.n_regs-1)
1024 op = randint(0, max_opnums)
1025 opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
1026
1027 if shadowing:
1028 insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
1029 else:
1030 insts.append((src1, src2, dest, op, opi, imm))
1031 return insts
1032
1033
1034 def wait_for_busy_clear(dut):
1035 while True:
1036 busy_o = yield dut.busy_o
1037 if not busy_o:
1038 break
1039 print("busy",)
1040 yield
1041
1042
1043 def disable_issue(dut):
1044 yield dut.aluissue.insn_i.eq(0)
1045 yield dut.brissue.insn_i.eq(0)
1046 yield dut.lsissue.insn_i.eq(0)
1047
1048
1049 def wait_for_issue(dut, dut_issue):
1050 while True:
1051 issue_o = yield dut_issue.fn_issue_o
1052 if issue_o:
1053 yield from disable_issue(dut)
1054 yield dut.reg_enable_i.eq(0)
1055 break
1056 print("busy",)
1057 # yield from print_reg(dut, [1,2,3])
1058 yield
1059 # yield from print_reg(dut, [1,2,3])
1060
1061
1062 def scoreboard_branch_sim(dut, alusim):
1063
1064 iseed = 3
1065
1066 for i in range(1):
1067
1068 print("rseed", iseed)
1069 seed(iseed)
1070 iseed += 1
1071
1072 yield dut.branch_direction_o.eq(0)
1073
1074 # set random values in the registers
1075 for i in range(1, dut.n_regs):
1076 val = 31+i*3
1077 val = randint(0, (1 << alusim.rwidth)-1)
1078 yield dut.intregs.regs[i].reg.eq(val)
1079 alusim.setval(i, val)
1080
1081 if False:
1082 # create some instructions: branches create a tree
1083 insts = create_random_ops(dut, 1, True, 1)
1084 #insts.append((6, 6, 1, 2, (0, 0)))
1085 #insts.append((4, 3, 3, 0, (0, 0)))
1086
1087 src1 = randint(1, dut.n_regs-1)
1088 src2 = randint(1, dut.n_regs-1)
1089 #op = randint(4, 7)
1090 op = 4 # only BGT at the moment
1091
1092 branch_ok = create_random_ops(dut, 1, True, 1)
1093 branch_fail = create_random_ops(dut, 1, True, 1)
1094
1095 insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1096
1097 if True:
1098 insts = []
1099 insts.append((3, 5, 2, 0, (0, 0)))
1100 branch_ok = []
1101 branch_fail = []
1102 #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
1103 branch_ok.append(None)
1104 branch_fail.append((1, 1, 2, 0, (0, 1)))
1105 #branch_fail.append( None )
1106 insts.append((6, 4, (branch_ok, branch_fail), 4, (0, 0)))
1107
1108 siminsts = deepcopy(insts)
1109
1110 # issue instruction(s)
1111 i = -1
1112 instrs = insts
1113 branch_direction = 0
1114 while instrs:
1115 yield
1116 yield
1117 i += 1
1118 branch_direction = yield dut.branch_direction_o # way branch went
1119 (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1120 if branch_direction == 1 and shadow_on:
1121 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1122 continue # branch was "success" and this is a "failed"... skip
1123 if branch_direction == 2 and shadow_off:
1124 print("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1125 continue # branch was "fail" and this is a "success"... skip
1126 if branch_direction != 0:
1127 shadow_on = 0
1128 shadow_off = 0
1129 is_branch = op >= 4
1130 if is_branch:
1131 branch_ok, branch_fail = dest
1132 dest = src2
1133 # ok zip up the branch success / fail instructions and
1134 # drop them into the queue, one marked "to have branch success"
1135 # the other to be marked shadow branch "fail".
1136 # one out of each of these will be cancelled
1137 for ok, fl in zip(branch_ok, branch_fail):
1138 if ok:
1139 instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1140 if fl:
1141 instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1142 print("instr %d: (%d, %d, %d, %d, (%d, %d))" %
1143 (i, src1, src2, dest, op, shadow_on, shadow_off))
1144 yield from int_instr(dut, op, src1, src2, dest,
1145 shadow_on, shadow_off)
1146
1147 # wait for all instructions to stop before checking
1148 yield
1149 yield from wait_for_busy_clear(dut)
1150
1151 i = -1
1152 while siminsts:
1153 instr = siminsts.pop(0)
1154 if instr is None:
1155 continue
1156 (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1157 i += 1
1158 is_branch = op >= 4
1159 if is_branch:
1160 branch_ok, branch_fail = dest
1161 dest = src2
1162 print("sim %d: (%d, %d, %d, %d, (%d, %d))" %
1163 (i, src1, src2, dest, op, shadow_on, shadow_off))
1164 branch_res = alusim.op(op, src1, src2, dest)
1165 if is_branch:
1166 if branch_res:
1167 siminsts += branch_ok
1168 else:
1169 siminsts += branch_fail
1170
1171 # check status
1172 yield from alusim.check(dut)
1173 yield from alusim.dump(dut)
1174
1175
1176 def power_sim(m, dut, pdecode2, instruction, alusim):
1177
1178 seed(0)
1179
1180 for i in range(1):
1181
1182 # set random values in the registers
1183 for i in range(1, dut.n_regs):
1184 #val = randint(0, (1<<alusim.rwidth)-1)
1185 #val = 31+i*3
1186 val = i # XXX actually, not random at all
1187 yield dut.intregs.regs[i].reg.eq(val)
1188 alusim.setval(i, val)
1189
1190 # create some instructions
1191 lst = [#"addi 2, 0, 0x4321",
1192 #"addi 3, 0, 0x1234",
1193 "add 1, 3, 2",
1194 "add 4, 3, 5"
1195 ]
1196 with Program(lst) as program:
1197 gen = program.generate_instructions()
1198
1199 # issue instruction(s), wait for issue to be free before proceeding
1200 for ins, code in zip(gen, program.assembly.splitlines()):
1201 yield instruction.eq(ins) # raw binary instr.
1202 yield #Delay(1e-6)
1203
1204 print("binary 0x{:X}".format(ins & 0xffffffff))
1205 print("assembly", code)
1206
1207 #alusim.op(op, opi, imm, src1, src2, dest)
1208 yield from power_instr_q(dut, pdecode2, ins, code)
1209
1210 # wait for all instructions to stop before checking
1211 while True:
1212 iqlen = yield dut.qlen_o
1213 if iqlen == 0:
1214 break
1215 yield
1216 yield
1217 yield
1218 yield
1219 yield
1220 yield from wait_for_busy_clear(dut)
1221
1222 # check status
1223 yield from alusim.check(dut)
1224 yield from alusim.dump(dut)
1225
1226
1227 def scoreboard_sim(dut, alusim):
1228
1229 seed(0)
1230
1231 for i in range(1):
1232
1233 # set random values in the registers
1234 for i in range(1, dut.n_regs):
1235 #val = randint(0, (1<<alusim.rwidth)-1)
1236 #val = 31+i*3
1237 val = i
1238 yield dut.intregs.regs[i].reg.eq(val)
1239 alusim.setval(i, val)
1240
1241 # create some instructions (some random, some regression tests)
1242 instrs = []
1243 if False:
1244 instrs = create_random_ops(dut, 15, True, 4)
1245
1246 if False: # LD/ST test (with immediate)
1247 instrs.append((1, 2, 0, 0x20, 1, 1, (0, 0))) # LD
1248 #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1249
1250 if False:
1251 instrs.append((1, 2, 2, 1, 1, 20, (0, 0)))
1252
1253 if False:
1254 instrs.append((7, 3, 2, 4, 0, 0, (0, 0)))
1255 instrs.append((7, 6, 6, 2, 0, 0, (0, 0)))
1256 instrs.append((1, 7, 2, 2, 0, 0, (0, 0)))
1257
1258 if True:
1259 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1260 0, 0, (0, 0)))
1261 instrs.append((5, 3, 3, InternalOp.OP_ADD, Function.ALU,
1262 0, 0, (0, 0)))
1263 if False:
1264 instrs.append((3, 5, 5, InternalOp.OP_MUL_L64, Function.ALU,
1265 1, 7, (0, 0)))
1266 if False:
1267 instrs.append((2, 3, 3, InternalOp.OP_ADD, Function.ALU,
1268 0, 0, (0, 0)))
1269
1270 if False:
1271 instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1272 instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1273 instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1274 instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1275 instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1276
1277 if False:
1278 instrs.append((3, 3, 4, 0, 0, 13979, (0, 0)))
1279 instrs.append((6, 4, 1, 2, 0, 40976, (0, 0)))
1280 instrs.append((1, 4, 7, 4, 1, 23652, (0, 0)))
1281
1282 if False:
1283 instrs.append((5, 6, 2, 1))
1284 instrs.append((2, 2, 4, 0))
1285 #instrs.append((2, 2, 3, 1))
1286
1287 if False:
1288 instrs.append((2, 1, 2, 3))
1289
1290 if False:
1291 instrs.append((2, 6, 2, 1))
1292 instrs.append((2, 1, 2, 0))
1293
1294 if False:
1295 instrs.append((1, 2, 7, 2))
1296 instrs.append((7, 1, 5, 0))
1297 instrs.append((4, 4, 1, 1))
1298
1299 if False:
1300 instrs.append((5, 6, 2, 2))
1301 instrs.append((1, 1, 4, 1))
1302 instrs.append((6, 5, 3, 0))
1303
1304 if False:
1305 # Write-after-Write Hazard
1306 instrs.append((3, 6, 7, 2))
1307 instrs.append((4, 4, 7, 1))
1308
1309 if False:
1310 # self-read/write-after-write followed by Read-after-Write
1311 instrs.append((1, 1, 1, 1))
1312 instrs.append((1, 5, 3, 0))
1313
1314 if False:
1315 # Read-after-Write followed by self-read-after-write
1316 instrs.append((5, 6, 1, 2))
1317 instrs.append((1, 1, 1, 1))
1318
1319 if False:
1320 # self-read-write sandwich
1321 instrs.append((5, 6, 1, 2))
1322 instrs.append((1, 1, 1, 1))
1323 instrs.append((1, 5, 3, 0))
1324
1325 if False:
1326 # very weird failure
1327 instrs.append((5, 2, 5, 2))
1328 instrs.append((2, 6, 3, 0))
1329 instrs.append((4, 2, 2, 1))
1330
1331 if False:
1332 v1 = 4
1333 yield dut.intregs.regs[5].reg.eq(v1)
1334 alusim.setval(5, v1)
1335 yield dut.intregs.regs[3].reg.eq(5)
1336 alusim.setval(3, 5)
1337 instrs.append((5, 3, 3, 4, (0, 0)))
1338 instrs.append((4, 2, 1, 2, (0, 1)))
1339
1340 if False:
1341 v1 = 6
1342 yield dut.intregs.regs[5].reg.eq(v1)
1343 alusim.setval(5, v1)
1344 yield dut.intregs.regs[3].reg.eq(5)
1345 alusim.setval(3, 5)
1346 instrs.append((5, 3, 3, 4, (0, 0)))
1347 instrs.append((4, 2, 1, 2, (1, 0)))
1348
1349 if False:
1350 instrs.append((4, 3, 5, 1, 0, (0, 0)))
1351 instrs.append((5, 2, 3, 1, 0, (0, 0)))
1352 instrs.append((7, 1, 5, 2, 0, (0, 0)))
1353 instrs.append((5, 6, 6, 4, 0, (0, 0)))
1354 instrs.append((7, 5, 2, 2, 0, (1, 0)))
1355 instrs.append((1, 7, 5, 0, 0, (0, 1)))
1356 instrs.append((1, 6, 1, 2, 0, (1, 0)))
1357 instrs.append((1, 6, 7, 3, 0, (0, 0)))
1358 instrs.append((6, 7, 7, 0, 0, (0, 0)))
1359
1360 # issue instruction(s), wait for issue to be free before proceeding
1361 for i, instr in enumerate(instrs):
1362 print (i, instr)
1363 src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
1364
1365 print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
1366 (i, src1, src2, dest, op, fn_unit, opi, imm))
1367 alusim.op(op, opi, imm, src1, src2, dest)
1368 yield from instr_q(dut, op, fn_unit, opi, imm, src1, src2, dest,
1369 br_ok, br_fail)
1370
1371 # wait for all instructions to stop before checking
1372 while True:
1373 iqlen = yield dut.qlen_o
1374 if iqlen == 0:
1375 break
1376 yield
1377 yield
1378 yield
1379 yield
1380 yield
1381 yield from wait_for_busy_clear(dut)
1382
1383 # check status
1384 yield from alusim.check(dut)
1385 yield from alusim.dump(dut)
1386
1387
1388 def test_scoreboard():
1389 regwidth = 64
1390 dut = IssueToScoreboard(2, 1, 1, regwidth, 8, 8)
1391 alusim = RegSim(regwidth, 8)
1392 memsim = MemSim(16, 8)
1393
1394 m = Module()
1395 comb = m.d.comb
1396 instruction = Signal(32)
1397
1398 # set up the decoder (and simulator, later)
1399 pdecode = create_pdecode()
1400 #simulator = ISA(pdecode, initial_regs)
1401
1402 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
1403 m.submodules.sim = dut
1404
1405 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
1406 comb += pdecode2.dec.bigendian.eq(0) # little / big?
1407
1408 vl = rtlil.convert(m, ports=dut.ports())
1409 with open("test_scoreboard6600.il", "w") as f:
1410 f.write(vl)
1411
1412 run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
1413 vcd_name='test_powerboard6600.vcd')
1414
1415 #run_simulation(dut, scoreboard_sim(dut, alusim),
1416 # vcd_name='test_scoreboard6600.vcd')
1417
1418 # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1419 # vcd_name='test_scoreboard6600.vcd')
1420
1421
1422 if __name__ == '__main__':
1423 test_scoreboard()