msr and pc moved to "state" in PowerDecode2
[soc.git] / src / soc / fu / compunits / test / test_compunit.py
1 from nmigen import Module, Signal, ResetSignal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 from nmigen.cli import rtlil
5 import unittest
6 from soc.decoder.power_decoder import (create_pdecode)
7 from soc.decoder.power_decoder2 import (PowerDecode2)
8 from soc.decoder.power_enums import Function
9 from soc.decoder.isa.all import ISA
10
11 from soc.experiment.compalu_multi import find_ok # hack
12 from soc.config.test.test_loadstore import TestMemPspec
13
14
15 def set_cu_input(cu, idx, data):
16 rdop = cu.get_in_name(idx)
17 yield cu.src_i[idx].eq(data)
18 while True:
19 rd_rel_o = yield cu.rd.rel_o[idx]
20 print("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
21 if rd_rel_o:
22 break
23 yield
24 yield cu.rd.go_i[idx].eq(1)
25 while True:
26 yield
27 rd_rel_o = yield cu.rd.rel_o[idx]
28 if rd_rel_o:
29 break
30 print("rd_rel %d wait HI" % idx, rd_rel_o)
31 yield
32 yield cu.rd.go_i[idx].eq(0)
33 yield cu.src_i[idx].eq(0)
34
35
36 def get_cu_output(cu, idx, code):
37 wrmask = yield cu.wrmask
38 wrop = cu.get_out_name(idx)
39 wrok = cu.get_out(idx)
40 fname = find_ok(wrok.fields)
41 wrok = yield getattr(wrok, fname)
42 print("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
43 assert wrmask & (1 << idx), \
44 "get_cu_output '%s': mask bit %d not set\n" \
45 "write-operand '%s' Data.ok likely not set (%s)" \
46 % (code, idx, wrop, hex(wrok))
47 while True:
48 wr_relall_o = yield cu.wr.rel_o
49 wr_rel_o = yield cu.wr.rel_o[idx]
50 print("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
51 if wr_rel_o:
52 break
53 yield
54 yield cu.wr.go_i[idx].eq(1)
55 yield Settle()
56 result = yield cu.dest[idx]
57 yield
58 yield cu.wr.go_i[idx].eq(0)
59 print("result", repr(code), idx, wrop, wrok, hex(result))
60
61 return result
62
63
64 def set_cu_inputs(cu, inp):
65 print("set_cu_inputs", inp)
66 for idx, data in inp.items():
67 yield from set_cu_input(cu, idx, data)
68 # gets out of sync when checking busy if there is no wait, here.
69 if len(inp) == 0:
70 yield # wait one cycle
71
72
73 def set_operand(cu, dec2, sim):
74 yield from cu.oper_i.eq_from_execute1(dec2.e)
75 yield cu.issue_i.eq(1)
76 yield
77 yield cu.issue_i.eq(0)
78 yield
79
80
81 def get_cu_outputs(cu, code):
82 res = {}
83 # wait for pipeline to indicate valid. this because for long
84 # pipelines (or FSMs) the write mask is only valid at that time.
85 if hasattr(cu, "alu"): # ALU CompUnits
86 while True:
87 valid_o = yield cu.alu.n.valid_o
88 if valid_o:
89 break
90 yield
91 else: # LDST CompUnit
92 # not a lot can be done about this - simply wait a few cycles
93 for i in range(5):
94 yield
95
96 wrmask = yield cu.wrmask
97 wr_rel_o = yield cu.wr.rel_o
98 print("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
99 # no point waiting (however really should doublecheck wr.rel)
100 if not wrmask:
101 return {}
102 # wait for at least one result
103 while True:
104 wr_rel_o = yield cu.wr.rel_o
105 if wr_rel_o:
106 break
107 yield
108 for i in range(cu.n_dst):
109 wr_rel_o = yield cu.wr.rel_o[i]
110 if wr_rel_o:
111 result = yield from get_cu_output(cu, i, code)
112 wrop = cu.get_out_name(i)
113 print("output", i, wrop, hex(result))
114 res[wrop] = result
115 return res
116
117
118 def get_inp_indexed(cu, inp):
119 res = {}
120 for i in range(cu.n_src):
121 wrop = cu.get_in_name(i)
122 if wrop in inp:
123 res[i] = inp[wrop]
124 return res
125
126
127 def get_l0_mem(l0): # BLECH!
128 if hasattr(l0.pimem, 'lsui'):
129 return l0.pimem.lsui.mem
130 return l0.pimem.mem.mem
131
132
133 def setup_test_memory(l0, sim):
134 mem = get_l0_mem(l0)
135 print("before, init mem", mem.depth, mem.width, mem)
136 for i in range(mem.depth):
137 data = sim.mem.ld(i*8, 8, False)
138 print("init ", i, hex(data))
139 yield mem._array[i].eq(data)
140 yield Settle()
141 for k, v in sim.mem.mem.items():
142 print(" %6x %016x" % (k, v))
143 print("before, nmigen mem dump")
144 for i in range(mem.depth):
145 actual_mem = yield mem._array[i]
146 print(" %6i %016x" % (i, actual_mem))
147
148
149 def dump_sim_memory(dut, l0, sim, code):
150 mem = get_l0_mem(l0)
151 print("sim mem dump")
152 for k, v in sim.mem.mem.items():
153 print(" %6x %016x" % (k, v))
154 print("nmigen mem dump")
155 for i in range(mem.depth):
156 actual_mem = yield mem._array[i]
157 print(" %6i %016x" % (i, actual_mem))
158
159
160 def check_sim_memory(dut, l0, sim, code):
161 mem = get_l0_mem(l0)
162
163 for i in range(mem.depth):
164 expected_mem = sim.mem.ld(i*8, 8, False)
165 actual_mem = yield mem._array[i]
166 dut.assertEqual(expected_mem, actual_mem,
167 "%s %d %x %x" % (code, i,
168 expected_mem, actual_mem))
169
170
171 class TestRunner(FHDLTestCase):
172 def __init__(self, test_data, fukls, iodef, funit, bigendian):
173 super().__init__("run_all")
174 self.test_data = test_data
175 self.fukls = fukls
176 self.iodef = iodef
177 self.funit = funit
178 self.bigendian = bigendian
179
180 def execute(self, cu, l0, instruction, pdecode2, simdec2, test):
181
182 program = test.program
183 print("test", test.name, test.mem)
184 gen = list(program.generate_instructions())
185 insncode = program.assembly.splitlines()
186 instructions = list(zip(gen, insncode))
187 sim = ISA(simdec2, test.regs, test.sprs, test.cr, test.mem,
188 test.msr,
189 initial_insns=gen, respect_pc=True,
190 disassembly=insncode,
191 bigendian=self.bigendian)
192
193 # initialise memory
194 if self.funit == Function.LDST:
195 yield from setup_test_memory(l0, sim)
196
197 pc = sim.pc.CIA.value
198 index = pc//4
199 msr = sim.msr.value
200 while True:
201 print("instr pc", pc)
202 try:
203 yield from sim.setup_one()
204 except KeyError: # indicates instruction not in imem: stop
205 break
206 yield Settle()
207 ins, code = instructions[index]
208 print("instruction @", index, code)
209
210 # ask the decoder to decode this binary data (endian'd)
211 yield pdecode2.dec.bigendian.eq(self.bigendian) # le / be?
212 yield pdecode2.state.msr.eq(msr) # set MSR "state"
213 yield pdecode2.state.pc.eq(pc) # set PC "state"
214 yield instruction.eq(ins) # raw binary instr.
215 yield Settle()
216 # debugging issue with branch
217 if self.funit == Function.BRANCH:
218 lk = yield pdecode2.e.do.lk
219 fast_out2 = yield pdecode2.e.write_fast2.data
220 fast_out2_ok = yield pdecode2.e.write_fast2.ok
221 print("lk:", lk, fast_out2, fast_out2_ok)
222 op_lk = yield cu.alu.pipe1.p.data_i.ctx.op.lk
223 print("op_lk:", op_lk)
224 print(dir(cu.alu.pipe1.n.data_o))
225 fn_unit = yield pdecode2.e.do.fn_unit
226 fuval = self.funit.value
227 self.assertEqual(fn_unit & fuval, fuval)
228
229 # set operand and get inputs
230 yield from set_operand(cu, pdecode2, sim)
231 # reset read-operand mask
232 rdmask = pdecode2.rdflags(cu)
233 #print ("hardcoded rdmask", cu.rdflags(pdecode2.e))
234 #print ("decoder rdmask", rdmask)
235 yield cu.rdmaskn.eq(~rdmask)
236
237 yield Settle()
238 iname = yield from self.iodef.get_cu_inputs(pdecode2, sim)
239 inp = get_inp_indexed(cu, iname)
240
241 # reset write-operand mask
242 for idx in range(cu.n_dst):
243 wrok = cu.get_out(idx)
244 fname = find_ok(wrok.fields)
245 yield getattr(wrok, fname).eq(0)
246
247 yield Settle()
248
249 # set inputs into CU
250 rd_rel_o = yield cu.rd.rel_o
251 wr_rel_o = yield cu.wr.rel_o
252 print("before inputs, rd_rel, wr_rel: ",
253 bin(rd_rel_o), bin(wr_rel_o))
254 assert wr_rel_o == 0, "wr.rel %s must be zero. "\
255 "previous instr not written all regs\n"\
256 "respec %s" % \
257 (bin(wr_rel_o), cu.rwid[1])
258 yield from set_cu_inputs(cu, inp)
259 rd_rel_o = yield cu.rd.rel_o
260 wr_rel_o = yield cu.wr.rel_o
261 wrmask = yield cu.wrmask
262 print("after inputs, rd_rel, wr_rel, wrmask: ",
263 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
264
265 # call simulated operation
266 yield from sim.execute_one()
267 yield Settle()
268 pc = sim.pc.CIA.value
269 index = pc//4
270 msr = sim.msr.value
271
272 # get all outputs (one by one, just "because")
273 res = yield from get_cu_outputs(cu, code)
274 wrmask = yield cu.wrmask
275 rd_rel_o = yield cu.rd.rel_o
276 wr_rel_o = yield cu.wr.rel_o
277 print("after got outputs, rd_rel, wr_rel, wrmask: ",
278 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
279
280 # wait for busy to go low
281 while True:
282 busy_o = yield cu.busy_o
283 print("busy", busy_o)
284 if not busy_o:
285 break
286 yield
287
288 # reset read-mask. IMPORTANT when there are no operands
289 yield cu.rdmaskn.eq(0)
290 yield
291
292 # debugging issue with branch
293 if self.funit == Function.BRANCH:
294 lr = yield cu.alu.pipe1.n.data_o.lr.data
295 lr_ok = yield cu.alu.pipe1.n.data_o.lr.ok
296 print("lr:", hex(lr), lr_ok)
297
298 if self.funit == Function.LDST:
299 yield from dump_sim_memory(self, l0, sim, code)
300
301 # sigh. hard-coded. test memory
302 if self.funit == Function.LDST:
303 yield from check_sim_memory(self, l0, sim, code)
304 yield from self.iodef.check_cu_outputs(res, pdecode2,
305 sim, cu,
306 code)
307 else:
308 yield from self.iodef.check_cu_outputs(res, pdecode2,
309 sim, cu.alu,
310 code)
311
312 def run_all(self):
313 m = Module()
314 comb = m.d.comb
315 instruction = Signal(32)
316
317 pdecode = create_pdecode()
318 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
319
320 # copy of the decoder for simulator
321 simdec = create_pdecode()
322 simdec2 = PowerDecode2(simdec)
323 m.submodules.simdec2 = simdec2 # pain in the neck
324
325 if self.funit == Function.LDST:
326 from soc.experiment.l0_cache import TstL0CacheBuffer
327 pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
328 addr_wid=48,
329 mask_wid=8,
330 reg_wid=64)
331 m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
332 pi = l0.l0.dports[0]
333 m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
334 m.d.comb += cu.ad.go_i.eq(cu.ad.rel_o) # link addr direct to rel
335 m.d.comb += cu.st.go_i.eq(cu.st.rel_o) # link store direct to rel
336 else:
337 m.submodules.cu = cu = self.fukls(0)
338 l0 = None
339
340 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
341 sim = Simulator(m)
342
343 sim.add_clock(1e-6)
344
345 def process():
346 yield cu.issue_i.eq(0)
347 yield
348
349 for test in self.test_data:
350 print(test.name)
351 with self.subTest(test.name):
352 yield from self.execute(cu, l0, instruction,
353 pdecode2, simdec2,
354 test)
355
356 sim.add_sync_process(process)
357
358 name = self.funit.name.lower()
359 with sim.write_vcd("%s_simulator.vcd" % name,
360 traces=[]):
361 sim.run()