1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from openpower
.decoder
.helpers
import fp64toselectable
17 from openpower
.decoder
.isa
.remap_dct_yield
import (halfrev2
, reverse_bits
,
19 from copy
import deepcopy
22 class DecoderTestCase(FHDLTestCase
):
24 def _check_regs(self
, sim
, expected
):
26 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
28 def _check_fpregs(self
, sim
, expected
):
30 self
.assertEqual(sim
.fpr(i
), SelectableInt(expected
[i
], 64))
32 def test_sv_load_store_elementstride(self
):
33 """>>> lst = ["addi 1, 0, 0x0010",
37 "sv.stw/els 4.v, 16(1)",
38 "sv.lwz/els 8.v, 16(1)"]
40 note: element stride mode is only enabled when RA is a scalar
41 and when the immediate is non-zero
43 element stride is computed as:
45 EA = (RA|0) + EXTS(D) * i
47 lst
= SVP64Asm(["addi 1, 0, 0x0010",
51 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
52 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
55 # SVSTATE (in this case, VL=2)
56 svstate
= SVP64State()
58 svstate
.maxvl
= 2 # MAXVL
59 print ("SVSTATE", bin(svstate
.asint()))
61 with
Program(lst
, bigendian
=False) as program
:
62 sim
= self
.run_tst_program(program
, svstate
=svstate
)
63 mem
= sim
.mem
.dump(printout
=False)
65 # contents of memory expected at:
66 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
67 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
68 # therefore, at address 0x10 ==> 0x1234
69 # therefore, at address 0x28 ==> 0x1235
70 expected_mem
= [(16, 0x1234),
72 self
.assertEqual(mem
, expected_mem
)
74 self
.assertEqual(sim
.gpr(8), SelectableInt(0x1234, 64))
75 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1235, 64))
77 def test_sv_load_store_unitstride(self
):
78 """>>> lst = ["addi 1, 0, 0x0010",
85 note: unit stride mode is only enabled when RA is a scalar.
87 unit stride is computed as:
89 EA = (RA|0) + EXTS(D) + LDSTsize * i
90 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
92 lst
= SVP64Asm(["addi 1, 0, 0x0010",
96 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
97 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
100 # SVSTATE (in this case, VL=2)
101 svstate
= SVP64State()
103 svstate
.maxvl
= 2 # MAXVL
104 print ("SVSTATE", bin(svstate
.asint()))
106 with
Program(lst
, bigendian
=False) as program
:
107 sim
= self
.run_tst_program(program
, svstate
=svstate
)
108 mem
= sim
.mem
.dump(printout
=False)
111 # contents of memory expected at:
112 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
113 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
114 # therefore, at address 0x24 ==> 0x1234
115 # therefore, at address 0x28 ==> 0x1235
116 self
.assertEqual(mem
, [(24, 0x123500001234)])
118 self
.assertEqual(sim
.gpr(12), SelectableInt(0x1234, 64))
119 self
.assertEqual(sim
.gpr(13), SelectableInt(0x1235, 64))
121 def test_sv_load_store_bitreverse(self
):
122 """>>> lst = ["addi 1, 0, 0x0010",
130 "sv.lwzbr 12.v, 4(1), 2"]
132 note: bitreverse mode is... odd. it's the butterfly generator
133 from Cooley-Tukey FFT:
134 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
136 bitreverse LD is computed as:
138 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
140 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
141 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
143 and thus creates the butterfly needed for one iteration of FFT.
144 the RC (shift) is to be able to offset the LDs by Radix-2 spans
146 lst
= SVP64Asm(["addi 1, 0, 0x0010",
152 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
153 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
156 # SVSTATE (in this case, VL=4)
157 svstate
= SVP64State()
159 svstate
.maxvl
= 4 # MAXVL
160 print ("SVSTATE", bin(svstate
.asint()))
162 with
Program(lst
, bigendian
=False) as program
:
163 sim
= self
.run_tst_program(program
, svstate
=svstate
)
164 mem
= sim
.mem
.dump(printout
=False)
167 self
.assertEqual(mem
, [(16, 0x020200000101),
168 (24, 0x040400000303)])
171 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
172 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
173 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
174 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
175 # r1=0x10, RC=0, offs=4: contents of memory expected at:
176 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
177 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
178 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
179 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
180 # therefore loaded from (bit-reversed indexing):
181 # r9 => mem[0x10] which was stored from r5
182 # r10 => mem[0x18] which was stored from r6
183 # r11 => mem[0x18] which was stored from r7
184 # r12 => mem[0x1c] which was stored from r8
185 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
186 self
.assertEqual(sim
.gpr(13), SelectableInt(0x303, 64))
187 self
.assertEqual(sim
.gpr(14), SelectableInt(0x202, 64))
188 self
.assertEqual(sim
.gpr(15), SelectableInt(0x404, 64))
190 def test_sv_load_store_bitreverse_fp(self
):
191 """>>> lst = ["addi 1, 0, 0x0010",
199 "sv.lfdbr 12.v, 4(1), 2"]
201 note: bitreverse mode is... odd. it's the butterfly generator
202 from Cooley-Tukey FFT:
203 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
205 bitreverse LD is computed as:
207 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
209 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
210 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
212 and thus creates the butterfly needed for one iteration of FFT.
213 the RC (shift) is to be able to offset the LDs by Radix-2 spans
215 lst
= SVP64Asm(["addi 1, 0, 0x0010",
221 "sv.std 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
222 "sv.lfdbr 12.v, 8(1), 2"]) # bit-reversed
225 # SVSTATE (in this case, VL=4)
226 svstate
= SVP64State()
228 svstate
.maxvl
= 4 # MAXVL
229 print ("SVSTATE", bin(svstate
.asint()))
233 with
Program(lst
, bigendian
=False) as program
:
234 sim
= self
.run_tst_program(program
, svstate
=svstate
,
236 mem
= sim
.mem
.dump(printout
=False)
239 self
.assertEqual(mem
, [(16, 0x101),
246 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
247 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
248 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
249 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
250 # r1=0x10, RC=0, offs=4: contents of memory expected at:
251 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
252 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
253 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
254 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
255 # therefore loaded from (bit-reversed indexing):
256 # r9 => mem[0x10] which was stored from r5
257 # r10 => mem[0x18] which was stored from r6
258 # r11 => mem[0x18] which was stored from r7
259 # r12 => mem[0x1c] which was stored from r8
260 self
.assertEqual(sim
.fpr(12), SelectableInt(0x101, 64))
261 self
.assertEqual(sim
.fpr(13), SelectableInt(0x303, 64))
262 self
.assertEqual(sim
.fpr(14), SelectableInt(0x202, 64))
263 self
.assertEqual(sim
.fpr(15), SelectableInt(0x404, 64))
265 def test_sv_load_store_bitreverse2(self
):
266 """>>> lst = ["addi 1, 0, 0x0010",
270 "sv.lfsbr 12.v, 4(1), 2"]
272 note: bitreverse mode is... odd. it's the butterfly generator
273 from Cooley-Tukey FFT:
274 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
276 bitreverse LD is computed as:
278 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
280 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
281 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
283 and thus creates the butterfly needed for one iteration of FFT.
284 the RC (shift) is to be able to offset the LDs by Radix-2 spans
286 lst
= SVP64Asm(["addi 1, 0, 0x0010",
288 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
289 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
292 # SVSTATE (in this case, VL=4)
293 svstate
= SVP64State()
295 svstate
.maxvl
= 4 # MAXVL
296 print ("SVSTATE", bin(svstate
.asint()))
301 fprs
[4] = fp64toselectable(1.0)
302 fprs
[5] = fp64toselectable(2.0)
303 fprs
[6] = fp64toselectable(3.0)
304 fprs
[7] = fp64toselectable(4.0)
306 # expected results, remember that bit-reversed load has been done
307 expected_fprs
= deepcopy(fprs
)
308 expected_fprs
[12] = fprs
[4] # 0b00 -> 0b00
309 expected_fprs
[13] = fprs
[6] # 0b01 -> 0b10
310 expected_fprs
[14] = fprs
[5] # 0b10 -> 0b01
311 expected_fprs
[15] = fprs
[7] # 0b11 -> 0b11
313 with
Program(lst
, bigendian
=False) as program
:
314 sim
= self
.run_tst_program(program
, svstate
=svstate
,
316 mem
= sim
.mem
.dump(printout
=False)
323 #self.assertEqual(mem, [(16, 0x020200000101),
324 # (24, 0x040400000303)])
325 self
._check
_fpregs
(sim
, expected_fprs
)
327 def test_sv_load_store_remap_matrix(self
):
328 """>>> lst = ["addi 1, 0, 0x0010",
335 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
336 "svshape 3, 3, 4, 0, 0",
337 "svremap 1, 1, 2, 0, 0, 0, 0, 1",
341 REMAPed a LD operation via a Matrix Multiply Schedule,
342 which is set up as 3x4 result
344 lst
= SVP64Asm(["addi 1, 0, 0x0010",
361 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
362 "svshape 3, 3, 4, 0, 0",
363 "svremap 1, 1, 2, 0, 0, 0, 0, 1",
365 #"sv.lwzbr 12.v, 4(1), 2", # bit-reversed
369 # SVSTATE (in this case, VL=4)
370 svstate
= SVP64State()
372 svstate
.maxvl
= 12 # MAXVL
373 print ("SVSTATE", bin(svstate
.asint()))
377 with
Program(lst
, bigendian
=False) as program
:
378 sim
= self
.run_tst_program(program
, svstate
=svstate
,
380 mem
= sim
.mem
.dump(printout
=False)
384 self
.assertEqual(mem
, [(16, 0x020200000101),
385 (24, 0x040400000303),
386 (32, 0x060600000505),
387 (40, 0x080800000707),
388 (48, 0x0a0a00000909),
389 (56, 0x0c0c00000b0b)])
392 self
.assertEqual(sim
.gpr(4), SelectableInt(0x101, 64))
393 self
.assertEqual(sim
.gpr(5), SelectableInt(0x202, 64))
394 self
.assertEqual(sim
.gpr(6), SelectableInt(0x303, 64))
395 self
.assertEqual(sim
.gpr(7), SelectableInt(0x404, 64))
396 self
.assertEqual(sim
.gpr(8), SelectableInt(0x505, 64))
397 self
.assertEqual(sim
.gpr(9), SelectableInt(0x606, 64))
398 self
.assertEqual(sim
.gpr(10), SelectableInt(0x707, 64))
399 self
.assertEqual(sim
.gpr(11), SelectableInt(0x808, 64))
400 # combination of bit-reversed load with a Matrix REMAP
403 self
.assertEqual(sim
.gpr(20+i
), SelectableInt(0x101, 64))
404 self
.assertEqual(sim
.gpr(23+i
), SelectableInt(0x505, 64))
405 self
.assertEqual(sim
.gpr(26+i
), SelectableInt(0x909, 64))
406 self
.assertEqual(sim
.gpr(29+i
), SelectableInt(0x202, 64))
408 def test_sv_load_store_bitreverse_remap_halfswap(self
):
409 """>>> lst = ["addi 1, 0, 0x0010",
420 "svshape 8, 1, 1, 6, 0",
421 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
422 "sv.lwzbr 12.v, 4(1), 2"]
424 bitreverse LD is computed as:
426 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
428 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
429 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
431 and thus creates the butterfly needed for one iteration of FFT.
432 the RC (shift) is to be able to offset the LDs by Radix-2 spans
434 on top of the bit-reversal is a REMAP for half-swaps for DCT
437 lst
= SVP64Asm(["addi 1, 0, 0x0010",
447 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
448 "svshape 8, 1, 1, 6, 0",
449 "svremap 1, 0, 0, 0, 0, 0, 0, 1",
450 #"setvl 0, 0, 8, 0, 1, 1",
451 "sv.lwzbr 12.v, 4(1), 2", # bit-reversed
456 # SVSTATE (in this case, VL=4)
457 svstate
= SVP64State()
459 svstate
.maxvl
= 8 # MAXVL
460 print ("SVSTATE", bin(svstate
.asint()))
464 avi
= [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
466 levels
= n
.bit_length() - 1
468 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
469 av
= halfrev2(avi
, False)
470 av
= [av
[ri
[i
]] for i
in range(n
)]
472 with
Program(lst
, bigendian
=False) as program
:
473 sim
= self
.run_tst_program(program
, svstate
=svstate
,
475 mem
= sim
.mem
.dump(printout
=False)
479 self
.assertEqual(mem
, [(16, 0x010200000001),
480 (24, 0x030400000203),
481 (32, 0x050600000405),
482 (40, 0x070800000607)])
484 for i
in range(len(avi
)):
485 print ("st gpr", i
, sim
.gpr(i
+4), hex(avi
[i
]))
486 self
.assertEqual(sim
.gpr(i
+4), avi
[i
])
487 self
.assertEqual(sim
.gpr(5), SelectableInt(0x102, 64))
488 self
.assertEqual(sim
.gpr(6), SelectableInt(0x203, 64))
489 self
.assertEqual(sim
.gpr(7), SelectableInt(0x304, 64))
490 self
.assertEqual(sim
.gpr(8), SelectableInt(0x405, 64))
491 self
.assertEqual(sim
.gpr(9), SelectableInt(0x506, 64))
492 self
.assertEqual(sim
.gpr(10), SelectableInt(0x607, 64))
493 self
.assertEqual(sim
.gpr(11), SelectableInt(0x708, 64))
494 # combination of bit-reversed load with a DCT half-swap REMAP
496 for i
in range(len(avi
)):
497 print ("ld gpr", i
, sim
.gpr(i
+12), hex(av
[i
]))
498 self
.assertEqual(sim
.gpr(i
+12), av
[i
])
500 def run_tst_program(self
, prog
, initial_regs
=None,
501 svstate
=None, initial_fprs
=None):
502 if initial_regs
is None:
503 initial_regs
= [0] * 32
504 if initial_fprs
is None:
505 initial_fprs
= [0] * 32
506 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
,
507 initial_fprs
=initial_fprs
)
515 if __name__
== "__main__":