Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / fu / compunits / test / test_compunit.py
1 from nmigen import Module, Signal, ResetSignal, Memory
2
3 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
4 # Also, check out the cxxsim nmigen branch, and latest yosys from git
5 from nmutil.sim_tmp_alternative import Simulator, Settle
6
7 from nmutil.formaltest import FHDLTestCase
8 from nmigen.cli import rtlil
9 import unittest
10 from openpower.decoder.power_decoder import create_pdecode
11 from openpower.decoder.power_decoder2 import PowerDecode2, get_rdflags
12 from openpower.decoder.power_enums import Function
13 from openpower.decoder.isa.all import ISA
14 from openpower.decoder.isa.mem import Mem
15
16 from soc.experiment.compalu_multi import find_ok # hack
17 from soc.config.test.test_loadstore import TestMemPspec
18
19
20 def set_cu_input(cu, idx, data):
21 rdop = cu.get_in_name(idx)
22 yield cu.src_i[idx].eq(data)
23 while True:
24 rd_rel_o = yield cu.rd.rel_o[idx]
25 print("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
26 if rd_rel_o:
27 break
28 yield
29 yield cu.rd.go_i[idx].eq(1)
30 while True:
31 yield
32 rd_rel_o = yield cu.rd.rel_o[idx]
33 if rd_rel_o:
34 break
35 print("rd_rel %d wait HI" % idx, rd_rel_o)
36 yield
37 yield cu.rd.go_i[idx].eq(0)
38 yield cu.src_i[idx].eq(0)
39
40
41 def get_cu_output(cu, idx, code):
42 wrmask = yield cu.wrmask
43 wrop = cu.get_out_name(idx)
44 wrok = cu.get_out(idx)
45 fname = find_ok(wrok.fields)
46 wrok = yield getattr(wrok, fname)
47 print("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
48 assert wrmask & (1 << idx), \
49 "get_cu_output '%s': mask bit %d not set\n" \
50 "write-operand '%s' Data.ok likely not set (%s)" \
51 % (code, idx, wrop, hex(wrok))
52 while True:
53 wr_relall_o = yield cu.wr.rel_o
54 wr_rel_o = yield cu.wr.rel_o[idx]
55 print("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
56 if wr_rel_o:
57 break
58 yield
59 yield cu.wr.go_i[idx].eq(1)
60 yield Settle()
61 result = yield cu.dest[idx]
62 yield
63 yield cu.wr.go_i[idx].eq(0)
64 print("result", repr(code), idx, wrop, wrok, hex(result))
65
66 return result
67
68
69 def set_cu_inputs(cu, inp):
70 print("set_cu_inputs", inp)
71 for idx, data in inp.items():
72 yield from set_cu_input(cu, idx, data)
73 # gets out of sync when checking busy if there is no wait, here.
74 if len(inp) == 0:
75 yield # wait one cycle
76
77
78 def set_operand(cu, dec2, sim):
79 yield from cu.oper_i.eq_from_execute1(dec2.do)
80 yield cu.issue_i.eq(1)
81 yield
82 yield cu.issue_i.eq(0)
83 yield
84
85
86 def get_cu_outputs(cu, code):
87 res = {}
88 # wait for pipeline to indicate valid. this because for long
89 # pipelines (or FSMs) the write mask is only valid at that time.
90 if hasattr(cu, "alu"): # ALU CompUnits
91 while True:
92 o_valid = yield cu.alu.n.o_valid
93 if o_valid:
94 break
95 yield
96 else: # LDST CompUnit
97 # not a lot can be done about this - simply wait a few cycles
98 for i in range(5):
99 yield
100
101 wrmask = yield cu.wrmask
102 wr_rel_o = yield cu.wr.rel_o
103 print("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
104 # no point waiting (however really should doublecheck wr.rel)
105 if not wrmask:
106 return {}
107 # wait for at least one result
108 while True:
109 wr_rel_o = yield cu.wr.rel_o
110 if wr_rel_o:
111 break
112 yield
113 for i in range(cu.n_dst):
114 wr_rel_o = yield cu.wr.rel_o[i]
115 if wr_rel_o:
116 result = yield from get_cu_output(cu, i, code)
117 wrop = cu.get_out_name(i)
118 print("output", i, wrop, hex(result))
119 res[wrop] = result
120 return res
121
122
123 def get_inp_indexed(cu, inp):
124 res = {}
125 for i in range(cu.n_src):
126 wrop = cu.get_in_name(i)
127 if wrop in inp:
128 res[i] = inp[wrop]
129 return res
130
131
132 def get_l0_mem(l0): # BLECH! this is awful! hunting around through structures
133 if hasattr(l0.pimem, 'lsui'):
134 return l0.pimem.lsui.mem
135 mem = l0.pimem.mem
136 if isinstance(mem, Memory): # euuurg this one is for TestSRAMLoadStore1
137 return mem
138 return mem.mem
139
140
141 def setup_tst_memory(l0, test_mem):
142 # create independent Sim Mem from test values
143 sim_mem = Mem(initial_mem=test_mem)
144 mem = get_l0_mem(l0)
145 print("before, init mem", mem.depth, mem.width, mem)
146 for i in range(mem.depth):
147 data = sim_mem.ld(i*8, 8, False)
148 print("init ", i, hex(data))
149 yield mem._array[i].eq(data)
150 yield Settle()
151 for k, v in sim_mem.mem.items():
152 print(" %6x %016x" % (k, v))
153 print("before, nmigen mem dump")
154 for i in range(mem.depth):
155 actual_mem = yield mem._array[i]
156 print(" %6i %016x" % (i, actual_mem))
157
158
159 def dump_sim_memory(dut, l0, sim, code):
160 mem = get_l0_mem(l0)
161 print("sim mem dump")
162 for k, v in sim.mem.mem.items():
163 print(" %6x %016x" % (k, v))
164 print("nmigen mem dump")
165 for i in range(mem.depth):
166 actual_mem = yield mem._array[i]
167 print(" %6i %016x" % (i, actual_mem))
168
169
170 def check_sim_memory(dut, l0, sim, code):
171 mem = get_l0_mem(l0)
172
173 for i in range(mem.depth):
174 expected_mem = sim.mem.ld(i*8, 8, False)
175 actual_mem = yield mem._array[i]
176 dut.assertEqual(expected_mem, actual_mem,
177 "%s %d %x %x" % (code, i,
178 expected_mem, actual_mem))
179
180
181 class TestRunner(FHDLTestCase):
182 def __init__(self, test_data, fukls, iodef, funit, bigendian):
183 super().__init__("run_all")
184 self.test_data = test_data
185 self.fukls = fukls
186 self.iodef = iodef
187 self.funit = funit
188 self.bigendian = bigendian
189
190 def execute(self, cu, l0, instruction, pdecode2, simdec2, test):
191
192 program = test.program
193 print("test", test.name, test.mem)
194 gen = list(program.generate_instructions())
195 insncode = program.assembly.splitlines()
196 instructions = list(zip(gen, insncode))
197 sim = ISA(simdec2, test.regs, test.sprs, test.cr, test.mem,
198 test.msr,
199 initial_insns=gen, respect_pc=True,
200 disassembly=insncode,
201 bigendian=self.bigendian)
202
203 # initialise memory
204 if self.funit == Function.LDST:
205 yield from setup_tst_memory(l0, test.mem)
206
207 pc = sim.pc.CIA.value
208 index = pc//4
209 msr = sim.msr.value
210 while True:
211 print("instr pc", pc)
212 try:
213 yield from sim.setup_one()
214 except KeyError: # indicates instruction not in imem: stop
215 break
216 yield Settle()
217 ins, code = instructions[index]
218 print("instruction @", index, code)
219
220 # ask the decoder to decode this binary data (endian'd)
221 yield pdecode2.dec.bigendian.eq(self.bigendian) # le / be?
222 yield pdecode2.state.msr.eq(msr) # set MSR "state"
223 yield pdecode2.state.pc.eq(pc) # set PC "state"
224 yield instruction.eq(ins) # raw binary instr.
225 yield Settle()
226 # debugging issue with branch
227 if self.funit == Function.BRANCH:
228 lk = yield pdecode2.e.do.lk
229 fast_out2 = yield pdecode2.e.write_fast2.data
230 fast_out2_ok = yield pdecode2.e.write_fast2.ok
231 print("lk:", lk, fast_out2, fast_out2_ok)
232 op_lk = yield cu.alu.pipe1.p.i_data.ctx.op.lk
233 print("op_lk:", op_lk)
234 print(dir(cu.alu.pipe1.n.o_data))
235 fn_unit = yield pdecode2.e.do.fn_unit
236 fuval = self.funit.value
237 self.assertEqual(fn_unit & fuval, fuval)
238
239 # set operand and get inputs
240 yield from set_operand(cu, pdecode2, sim)
241 # reset read-operand mask
242 rdmask = get_rdflags(pdecode2.e, cu)
243 #print ("hardcoded rdmask", cu.rdflags(pdecode2.e))
244 #print ("decoder rdmask", rdmask)
245 yield cu.rdmaskn.eq(~rdmask)
246
247 yield Settle()
248 iname = yield from self.iodef.get_cu_inputs(pdecode2, sim)
249 inp = get_inp_indexed(cu, iname)
250
251 # reset write-operand mask
252 for idx in range(cu.n_dst):
253 wrok = cu.get_out(idx)
254 fname = find_ok(wrok.fields)
255 yield getattr(wrok, fname).eq(0)
256
257 yield Settle()
258
259 # set inputs into CU
260 rd_rel_o = yield cu.rd.rel_o
261 wr_rel_o = yield cu.wr.rel_o
262 print("before inputs, rd_rel, wr_rel: ",
263 bin(rd_rel_o), bin(wr_rel_o))
264 assert wr_rel_o == 0, "wr.rel %s must be zero. "\
265 "previous instr not written all regs\n"\
266 "respec %s" % \
267 (bin(wr_rel_o), cu.rwid[1])
268 yield from set_cu_inputs(cu, inp)
269 rd_rel_o = yield cu.rd.rel_o
270 wr_rel_o = yield cu.wr.rel_o
271 wrmask = yield cu.wrmask
272 print("after inputs, rd_rel, wr_rel, wrmask: ",
273 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
274
275 # call simulated operation
276 yield from sim.execute_one()
277 yield Settle()
278 pc = sim.pc.CIA.value
279 index = pc//4
280 msr = sim.msr.value
281
282 # get all outputs (one by one, just "because")
283 res = yield from get_cu_outputs(cu, code)
284 wrmask = yield cu.wrmask
285 rd_rel_o = yield cu.rd.rel_o
286 wr_rel_o = yield cu.wr.rel_o
287 print("after got outputs, rd_rel, wr_rel, wrmask: ",
288 bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
289
290 # wait for busy to go low
291 while True:
292 busy_o = yield cu.busy_o
293 print("busy", busy_o)
294 if not busy_o:
295 break
296 yield
297
298 # reset read-mask. IMPORTANT when there are no operands
299 yield cu.rdmaskn.eq(0)
300 yield
301
302 # debugging issue with branch
303 if self.funit == Function.BRANCH:
304 lr = yield cu.alu.pipe1.n.o_data.lr.data
305 lr_ok = yield cu.alu.pipe1.n.o_data.lr.ok
306 print("lr:", hex(lr), lr_ok)
307
308 if self.funit == Function.LDST:
309 yield from dump_sim_memory(self, l0, sim, code)
310
311 # sigh. hard-coded. test memory
312 if self.funit == Function.LDST:
313 yield from check_sim_memory(self, l0, sim, code)
314 yield from self.iodef.check_cu_outputs(res, pdecode2,
315 sim, cu,
316 code)
317 else:
318 yield from self.iodef.check_cu_outputs(res, pdecode2,
319 sim, cu.alu,
320 code)
321
322 def run_all(self):
323 m = Module()
324 comb = m.d.comb
325 instruction = Signal(32)
326
327 pdecode = create_pdecode()
328 m.submodules.pdecode2 = pdecode2 = PowerDecode2(pdecode)
329
330 # copy of the decoder for simulator
331 simdec = create_pdecode()
332 simdec2 = PowerDecode2(simdec)
333 m.submodules.simdec2 = simdec2 # pain in the neck
334
335 if self.funit == Function.LDST:
336 from soc.experiment.l0_cache import TstL0CacheBuffer
337 pspec = TestMemPspec(ldst_ifacetype='test_bare_wb',
338 addr_wid=48,
339 mask_wid=8,
340 reg_wid=64)
341 m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
342 pi = l0.l0.dports[0]
343 m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
344 m.d.comb += cu.ad.go_i.eq(cu.ad.rel_o) # link addr direct to rel
345 m.d.comb += cu.st.go_i.eq(cu.st.rel_o) # link store direct to rel
346 else:
347 m.submodules.cu = cu = self.fukls(0)
348 l0 = None
349
350 comb += pdecode2.dec.raw_opcode_in.eq(instruction)
351 sim = Simulator(m)
352
353 sim.add_clock(1e-6)
354
355 def process():
356 yield cu.issue_i.eq(0)
357 yield
358
359 for test in self.test_data:
360 print(test.name)
361 with self.subTest(test.name):
362 yield from self.execute(cu, l0, instruction,
363 pdecode2, simdec2,
364 test)
365
366 sim.add_sync_process(process)
367
368 name = self.funit.name.lower()
369 with sim.write_vcd("%s_simulator.vcd" % name,
370 traces=[]):
371 sim.run()