use get_l0_mem in HDLState to get memory data
[soc.git] / src / soc / fu / compunits / test / test_compunit.py
1 from nmigen import Module, Signal, ResetSignal, Memory
2
3 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
4 # Also, check out the cxxsim nmigen branch, and latest yosys from git
5 from nmutil.sim_tmp_alternative import Simulator, Settle
6
7 from nmutil.formaltest import FHDLTestCase
8 from nmigen.cli import rtlil
9 import unittest
10 from openpower.decoder.power_decoder import create_pdecode
11 from openpower.decoder.power_decoder2 import PowerDecode2, get_rdflags
12 from openpower.decoder.power_enums import Function
13 from openpower.decoder.isa.all import ISA
14
15 from soc.experiment.compalu_multi import find_ok # hack
16 from soc.config.test.test_loadstore import TestMemPspec
17
18
19 def set_cu_input(cu, idx, data):
20 rdop = cu.get_in_name(idx)
21 yield cu.src_i[idx].eq(data)
22 while True:
23 rd_rel_o = yield cu.rd.rel_o[idx]
24 print("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
25 if rd_rel_o:
26 break
27 yield
28 yield cu.rd.go_i[idx].eq(1)
29 while True:
30 yield
31 rd_rel_o = yield cu.rd.rel_o[idx]
32 if rd_rel_o:
33 break
34 print("rd_rel %d wait HI" % idx, rd_rel_o)
35 yield
36 yield cu.rd.go_i[idx].eq(0)
37 yield cu.src_i[idx].eq(0)
38
39
40 def get_cu_output(cu, idx, code):
41 wrmask = yield cu.wrmask
42 wrop = cu.get_out_name(idx)
43 wrok = cu.get_out(idx)
44 fname = find_ok(wrok.fields)
45 wrok = yield getattr(wrok, fname)
46 print("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
47 assert wrmask & (1 << idx), \
48 "get_cu_output '%s': mask bit %d not set\n" \
49 "write-operand '%s' Data.ok likely not set (%s)" \
50 % (code, idx, wrop, hex(wrok))
51 while True:
52 wr_relall_o = yield cu.wr.rel_o
53 wr_rel_o = yield cu.wr.rel_o[idx]
54 print("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
55 if wr_rel_o:
56 break
57 yield
58 yield cu.wr.go_i[idx].eq(1)
59 yield Settle()
60 result = yield cu.dest[idx]
61 yield
62 yield cu.wr.go_i[idx].eq(0)
63 print("result", repr(code), idx, wrop, wrok, hex(result))
64
65 return result
66
67
68 def set_cu_inputs(cu, inp):
69 print("set_cu_inputs", inp)
70 for idx, data in inp.items():
71 yield from set_cu_input(cu, idx, data)
72 # gets out of sync when checking busy if there is no wait, here.
73 if len(inp) == 0:
74 yield # wait one cycle
75
76
77 def set_operand(cu, dec2, sim):
78 yield from cu.oper_i.eq_from_execute1(dec2.do)
79 yield cu.issue_i.eq(1)
80 yield
81 yield cu.issue_i.eq(0)
82 yield
83
84
85 def get_cu_outputs(cu, code):
86 res = {}
87 # wait for pipeline to indicate valid. this because for long
88 # pipelines (or FSMs) the write mask is only valid at that time.
89 if hasattr(cu, "alu"): # ALU CompUnits
90 while True:
91 o_valid = yield cu.alu.n.o_valid
92 if o_valid:
93 break
94 yield
95 else: # LDST CompUnit
96 # not a lot can be done about this - simply wait a few cycles
97 for i in range(5):
98 yield
99
100 wrmask = yield cu.wrmask
101 wr_rel_o = yield cu.wr.rel_o
102 print("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
103 # no point waiting (however really should doublecheck wr.rel)
104 if not wrmask:
105 return {}
106 # wait for at least one result
107 while True:
108 wr_rel_o = yield cu.wr.rel_o
109 if wr_rel_o:
110 break
111 yield
112 for i in range(cu.n_dst):
113 wr_rel_o = yield cu.wr.rel_o[i]
114 if wr_rel_o:
115 result = yield from get_cu_output(cu, i, code)
116 wrop = cu.get_out_name(i)
117 print("output", i, wrop, hex(result))
118 res[wrop] = result
119 return res
120
121
122 def get_inp_indexed(cu, inp):
123 res = {}
124 for i in range(cu.n_src):
125 wrop = cu.get_in_name(i)
126 if wrop in inp:
127 res[i] = inp[wrop]
128 return res
129
130
131 def get_l0_mem(l0): # BLECH! this is awful! hunting around through structures
132 if hasattr(l0.pimem, 'lsui'):
133 return l0.pimem.lsui.mem
134 mem = l0.pimem.mem
135 if isinstance(mem, Memory): # euuurg this one is for TestSRAMLoadStore1
136 return mem
137 return mem.mem
138
139
140 def setup_tst_memory(l0, sim):
141 mem = get_l0_mem(l0)
142 print("before, init mem", mem.depth, mem.width, mem)
143 for i in range(mem.depth):
144 data = sim.mem.ld(i*8, 8, False)
145 print("init ", i, hex(data))
146 yield mem._array[i].eq(data)
147 yield Settle()
148 for k, v in sim.mem.mem.items():
149 print(" %6x %016x" % (k, v))
150 print("before, nmigen mem dump")
151 for i in range(mem.depth):
152 actual_mem = yield mem._array[i]
153 print(" %6i %016x" % (i, actual_mem))
154
155
156 def dump_sim_memory(dut, l0, sim, code):
157 mem = get_l0_mem(l0)
158 print("sim mem dump")
159 for k, v in sim.mem.mem.items():
160 print(" %6x %016x" % (k, v))
161 print("nmigen mem dump")
162 for i in range(mem.depth):
163 actual_mem = yield mem._array[i]
164 print(" %6i %016x" % (i, actual_mem))
165
166
167 def check_sim_memory(dut, l0, sim, code):
168 mem = get_l0_mem(l0)
169
170 for i in range(mem.depth):
171 expected_mem = sim.mem.ld(i*8, 8, False)
172 actual_mem = yield mem._array[i]
173 dut.assertEqual(expected_mem, actual_mem,
174 "%s %d %x %x" % (code, i,
175 expected_mem, actual_mem))
176
177
178 class TestRunner(FHDLTestCase):
179 def __init__(self, test_data, fukls, iodef, funit, bigendian):
180 super().__init__("run_all")
181 self.test_data = test_data
182 self.fukls = fukls
183 self.iodef = iodef
184 self.funit = funit
185 self.bigendian = bigendian
186
187 def execute(self, cu, l0, instruction, pdecode2, simdec2, test):
188
189 program = test.program
190 print("test", test.name, test.mem)
191 gen = list(program.generate_instructions())
192 insncode = program.assembly.splitlines()
193 instructions = list(zip(gen, insncode))
194 sim = ISA(simdec2, test.regs, test.sprs, test.cr, test.mem,
195 test.msr,
196 initial_insns=gen, respect_pc=True,
197 disassembly=insncode,
198 bigendian=self.bigendian)
199
200 # initialise memory
201 if self.funit == Function.LDST:
202 yield from setup_tst_memory(l0, sim)
203
204 pc = sim.pc.CIA.value
205 index = pc//4
206 msr = sim.msr.value
207 while True:
208 print("instr pc", pc)
209 try:
210 yield from sim.setup_one()
211 except KeyError: # indicates instruction not in imem: stop
212 break
213 yield Settle()
214 ins, code = instructions[index]
215 print("instruction @", index, code)
216
217 # ask the decoder to decode this binary data (endian'd)
218 yield pdecode2.dec.bigendian.eq(self.bigendian) # le / be?
219 yield pdecode2.state.msr.eq(msr) # set MSR "state"
220 yield pdecode2.state.pc.eq(pc) # set PC "state"
221 yield instruction.eq(ins) # raw binary instr.
222 yield Settle()
223 # debugging issue with branch
224 if self.funit == Function.BRANCH:
225 lk = yield pdecode2.e.do.lk
226 fast_out2 = yield pdecode2.e.write_fast2.data
227 fast_out2_ok = yield pdecode2.e.write_fast2.ok
228 print("lk:", lk, fast_out2, fast_out2_ok)
229 op_lk = yield cu.alu.pipe1.p.i_data.ctx.op.lk
230 print("op_lk:", op_lk)
231 print(dir(cu.alu.pipe1.n.o_data))
232 fn_unit = yield pdecode2.e.do.fn_unit
233 fuval = self.funit.value
234 self.assertEqual(fn_unit & fuval, fuval)
235
236 # set operand and get inputs
237 yield from set_operand(cu, pdecode2, sim)
238 # reset read-operand mask
239 rdmask = get_rdflags(pdecode2.e, cu)
240 #print ("hardcoded rdmask", cu.rdflags(pdecode2.e))
241 #print ("decoder rdmask", rdmask)
242 yield cu.rdmaskn.eq(~rdmask)
243
244 yield Settle()
245 iname = yield from self.iodef.get_cu_inputs(pdecode2, sim)
246 inp = get_inp_indexed(cu, iname)
247
248 # reset write-operand mask
249 for idx in range(cu.n_dst):
250 wrok = cu.get_out(idx)
251 fname = find_ok(wrok.fields)
252 yield getattr(wrok, fname).eq(0)
253
254 yield Settle()
255
256 # set inputs into CU
257 rd_rel_o = yield cu.rd.rel_o
258 wr_rel_o = yield cu.wr.rel_o
259 print("before inputs, rd_rel, wr_rel: ",
260 bin(rd_rel_o), bin(wr_rel_o))
261 assert wr_rel_o == 0, "wr.rel %s must be zero. "\
262 "previous instr not written all regs\n"\
263 "respec %s" % \
264 (bin(wr_rel_o), cu.rwid[1])
265 yield from set_cu_inputs(cu, inp)
266 rd_rel_o = yield cu.rd.rel_o
267 wr_rel_o = yield cu.wr.rel_o
268 wrmask = yield cu.wrmask
269 print("after inputs, rd_rel, wr_rel, wrmask: ",
270 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
271
272 # call simulated operation
273 yield from sim.execute_one()
274 yield Settle()
275 pc = sim.pc.CIA.value
276 index = pc//4
277 msr = sim.msr.value
278
279 # get all outputs (one by one, just "because")
280 res = yield from get_cu_outputs(cu, code)
281 wrmask = yield cu.wrmask
282 rd_rel_o = yield cu.rd.rel_o
283 wr_rel_o = yield cu.wr.rel_o
284 print("after got outputs, rd_rel, wr_rel, wrmask: ",
285 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
286
287 # wait for busy to go low
288 while True:
289 busy_o = yield cu.busy_o
290 print("busy", busy_o)
291 if not busy_o:
292 break
293 yield
294
295 # reset read-mask. IMPORTANT when there are no operands
296 yield cu.rdmaskn.eq(0)
297 yield
298
299 # debugging issue with branch
300 if self.funit == Function.BRANCH:
301 lr = yield cu.alu.pipe1.n.o_data.lr.data
302 lr_ok = yield cu.alu.pipe1.n.o_data.lr.ok
303 print("lr:", hex(lr), lr_ok)
304
305 if self.funit == Function.LDST:
306 yield from dump_sim_memory(self, l0, sim, code)
307
308 # sigh. hard-coded. test memory
309 if self.funit == Function.LDST:
310 yield from check_sim_memory(self, l0, sim, code)
311 yield from self.iodef.check_cu_outputs(res, pdecode2,
312 sim, cu,
313 code)
314 else:
315 yield from self.iodef.check_cu_outputs(res, pdecode2,
316 sim, cu.alu,
317 code)
318
319 def run_all(self):
320 m = Module()
321 comb = m.d.comb
322 instruction = Signal(32)
323
324 pdecode = create_pdecode()
325 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
326
327 # copy of the decoder for simulator
328 simdec = create_pdecode()
329 simdec2 = PowerDecode2(simdec)
330 m.submodules.simdec2 = simdec2 # pain in the neck
331
332 if self.funit == Function.LDST:
333 from soc.experiment.l0_cache import TstL0CacheBuffer
334 pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
335 addr_wid=48,
336 mask_wid=8,
337 reg_wid=64)
338 m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
339 pi = l0.l0.dports[0]
340 m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
341 m.d.comb += cu.ad.go_i.eq(cu.ad.rel_o) # link addr direct to rel
342 m.d.comb += cu.st.go_i.eq(cu.st.rel_o) # link store direct to rel
343 else:
344 m.submodules.cu = cu = self.fukls(0)
345 l0 = None
346
347 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
348 sim = Simulator(m)
349
350 sim.add_clock(1e-6)
351
352 def process():
353 yield cu.issue_i.eq(0)
354 yield
355
356 for test in self.test_data:
357 print(test.name)
358 with self.subTest(test.name):
359 yield from self.execute(cu, l0, instruction,
360 pdecode2, simdec2,
361 test)
362
363 sim.add_sync_process(process)
364
365 name = self.funit.name.lower()
366 with sim.write_vcd("%s_simulator.vcd" % name,
367 traces=[]):
368 sim.run()