1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from openpower
.decoder
.helpers
import fp64toselectable
17 from openpower
.decoder
.isa
.remap_dct_yield
import (halfrev2
, reverse_bits
,
19 from copy
import deepcopy
22 class DecoderTestCase(FHDLTestCase
):
24 def _check_regs(self
, sim
, expected
):
26 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
28 def _check_fpregs(self
, sim
, expected
):
30 self
.assertEqual(sim
.fpr(i
), SelectableInt(expected
[i
], 64))
32 def test_sv_load_store_elementstride(self
):
33 """>>> lst = ["addi 1, 0, 0x0010",
37 "sv.stw/els 4.v, 16(1)",
38 "sv.lwz/els 8.v, 16(1)"]
40 note: element stride mode is only enabled when RA is a scalar
41 and when the immediate is non-zero
43 element stride is computed as:
45 EA = (RA|0) + EXTS(D) * i
47 lst
= SVP64Asm(["addi 1, 0, 0x0010",
51 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
52 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
55 # SVSTATE (in this case, VL=2)
56 svstate
= SVP64State()
58 svstate
.maxvl
= 2 # MAXVL
59 print ("SVSTATE", bin(svstate
.asint()))
61 with
Program(lst
, bigendian
=False) as program
:
62 sim
= self
.run_tst_program(program
, svstate
=svstate
)
63 mem
= sim
.mem
.dump(printout
=False)
65 # contents of memory expected at:
66 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
67 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
68 # therefore, at address 0x10 ==> 0x1234
69 # therefore, at address 0x28 ==> 0x1235
70 expected_mem
= [(16, 0x1234),
72 self
.assertEqual(mem
, expected_mem
)
74 self
.assertEqual(sim
.gpr(8), SelectableInt(0x1234, 64))
75 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1235, 64))
77 def test_sv_load_store_unitstride(self
):
78 """>>> lst = ["addi 1, 0, 0x0010",
85 note: unit stride mode is only enabled when RA is a scalar.
87 unit stride is computed as:
89 EA = (RA|0) + EXTS(D) + LDSTsize * i
90 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
92 lst
= SVP64Asm(["addi 1, 0, 0x0010",
96 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
97 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
100 # SVSTATE (in this case, VL=2)
101 svstate
= SVP64State()
103 svstate
.maxvl
= 2 # MAXVL
104 print ("SVSTATE", bin(svstate
.asint()))
106 with
Program(lst
, bigendian
=False) as program
:
107 sim
= self
.run_tst_program(program
, svstate
=svstate
)
108 mem
= sim
.mem
.dump(printout
=False)
111 # contents of memory expected at:
112 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
113 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
114 # therefore, at address 0x24 ==> 0x1234
115 # therefore, at address 0x28 ==> 0x1235
116 self
.assertEqual(mem
, [(24, 0x123500001234)])
118 self
.assertEqual(sim
.gpr(12), SelectableInt(0x1234, 64))
119 self
.assertEqual(sim
.gpr(13), SelectableInt(0x1235, 64))
121 def test_sv_load_store_bitreverse(self
):
122 """>>> lst = ["addi 1, 0, 0x0010",
130 "sv.lwzbr 12.v, 4(1), 2"]
132 note: bitreverse mode is... odd. it's the butterfly generator
133 from Cooley-Tukey FFT:
134 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
136 bitreverse LD is computed as:
138 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
140 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
141 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
143 and thus creates the butterfly needed for one iteration of FFT.
144 the RC (shift) is to be able to offset the LDs by Radix-2 spans
146 lst
= SVP64Asm(["addi 1, 0, 0x0010",
152 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
153 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
156 # SVSTATE (in this case, VL=4)
157 svstate
= SVP64State()
159 svstate
.maxvl
= 4 # MAXVL
160 print ("SVSTATE", bin(svstate
.asint()))
162 with
Program(lst
, bigendian
=False) as program
:
163 sim
= self
.run_tst_program(program
, svstate
=svstate
)
164 mem
= sim
.mem
.dump(printout
=False)
167 self
.assertEqual(mem
, [(16, 0x020200000101),
168 (24, 0x040400000303)])
171 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
172 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
173 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
174 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
175 # r1=0x10, RC=0, offs=4: contents of memory expected at:
176 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
177 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
178 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
179 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
180 # therefore loaded from (bit-reversed indexing):
181 # r9 => mem[0x10] which was stored from r5
182 # r10 => mem[0x18] which was stored from r6
183 # r11 => mem[0x18] which was stored from r7
184 # r12 => mem[0x1c] which was stored from r8
185 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
186 self
.assertEqual(sim
.gpr(13), SelectableInt(0x303, 64))
187 self
.assertEqual(sim
.gpr(14), SelectableInt(0x202, 64))
188 self
.assertEqual(sim
.gpr(15), SelectableInt(0x404, 64))
190 def test_sv_load_store_bitreverse2(self
):
191 """>>> lst = ["addi 1, 0, 0x0010",
195 "sv.lfsbr 12.v, 4(1), 2"]
197 note: bitreverse mode is... odd. it's the butterfly generator
198 from Cooley-Tukey FFT:
199 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
201 bitreverse LD is computed as:
203 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
205 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
206 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
208 and thus creates the butterfly needed for one iteration of FFT.
209 the RC (shift) is to be able to offset the LDs by Radix-2 spans
211 lst
= SVP64Asm(["addi 1, 0, 0x0010",
213 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
214 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
217 # SVSTATE (in this case, VL=4)
218 svstate
= SVP64State()
220 svstate
.maxvl
= 4 # MAXVL
221 print ("SVSTATE", bin(svstate
.asint()))
226 fprs
[4] = fp64toselectable(1.0)
227 fprs
[5] = fp64toselectable(2.0)
228 fprs
[6] = fp64toselectable(3.0)
229 fprs
[7] = fp64toselectable(4.0)
231 # expected results, remember that bit-reversed load has been done
232 expected_fprs
= deepcopy(fprs
)
233 expected_fprs
[12] = fprs
[4] # 0b00 -> 0b00
234 expected_fprs
[13] = fprs
[6] # 0b01 -> 0b10
235 expected_fprs
[14] = fprs
[5] # 0b10 -> 0b01
236 expected_fprs
[15] = fprs
[7] # 0b11 -> 0b11
238 with
Program(lst
, bigendian
=False) as program
:
239 sim
= self
.run_tst_program(program
, svstate
=svstate
,
241 mem
= sim
.mem
.dump(printout
=False)
248 #self.assertEqual(mem, [(16, 0x020200000101),
249 # (24, 0x040400000303)])
250 self
._check
_fpregs
(sim
, expected_fprs
)
252 def test_sv_load_store_bitreverse_remap_matrix(self
):
253 """>>> lst = ["addi 1, 0, 0x0010",
261 "svshape 4, 4, 2, 0, 0",
262 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
263 "sv.lwzbr 12.v, 4(1), 2"]
265 note: bitreverse mode is... odd. it's the butterfly generator
266 from Cooley-Tukey FFT:
267 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
269 bitreverse LD is computed as:
271 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
273 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
274 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
276 and thus creates the butterfly needed for one iteration of FFT.
277 the RC (shift) is to be able to offset the LDs by Radix-2 spans
279 in this case however it is REMAPed via a Matrix Multiply Schedule,
280 which is set up as 4x2.
282 lst
= SVP64Asm(["addi 1, 0, 0x0010",
292 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
293 "svshape 4, 4, 2, 0, 0",
294 "svremap 31, 1, 2, 3, 0, 0, 0, 1",
295 #"setvl 0, 0, 8, 0, 1, 1",
296 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
299 # SVSTATE (in this case, VL=4)
300 svstate
= SVP64State()
302 svstate
.maxvl
= 8 # MAXVL
303 print ("SVSTATE", bin(svstate
.asint()))
307 with
Program(lst
, bigendian
=False) as program
:
308 sim
= self
.run_tst_program(program
, svstate
=svstate
,
310 mem
= sim
.mem
.dump(printout
=False)
314 self
.assertEqual(mem
, [(16, 0x020200000101),
315 (24, 0x040400000303),
316 (32, 0x060600000505),
317 (40, 0x080800000707)])
320 self
.assertEqual(sim
.gpr(4), SelectableInt(0x101, 64))
321 self
.assertEqual(sim
.gpr(5), SelectableInt(0x202, 64))
322 self
.assertEqual(sim
.gpr(6), SelectableInt(0x303, 64))
323 self
.assertEqual(sim
.gpr(7), SelectableInt(0x404, 64))
324 self
.assertEqual(sim
.gpr(8), SelectableInt(0x505, 64))
325 self
.assertEqual(sim
.gpr(9), SelectableInt(0x606, 64))
326 self
.assertEqual(sim
.gpr(10), SelectableInt(0x707, 64))
327 self
.assertEqual(sim
.gpr(11), SelectableInt(0x808, 64))
328 # combination of bit-reversed load with a Matrix REMAP
330 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
331 self
.assertEqual(sim
.gpr(13), SelectableInt(0x505, 64))
332 self
.assertEqual(sim
.gpr(14), SelectableInt(0x303, 64))
333 self
.assertEqual(sim
.gpr(15), SelectableInt(0x707, 64))
334 self
.assertEqual(sim
.gpr(16), SelectableInt(0x202, 64))
335 self
.assertEqual(sim
.gpr(17), SelectableInt(0x606, 64))
336 self
.assertEqual(sim
.gpr(18), SelectableInt(0x404, 64))
337 self
.assertEqual(sim
.gpr(19), SelectableInt(0x808, 64))
339 def test_sv_load_store_bitreverse_remap_halfswap(self
):
340 """>>> lst = ["addi 1, 0, 0x0010",
351 "svshape 8, 1, 1, 6, 0",
352 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
353 "sv.lwzbr 12.v, 4(1), 2"]
355 bitreverse LD is computed as:
357 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
359 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
360 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
362 and thus creates the butterfly needed for one iteration of FFT.
363 the RC (shift) is to be able to offset the LDs by Radix-2 spans
365 on top of the bit-reversal is a REMAP for half-swaps for DCT
368 lst
= SVP64Asm(["addi 1, 0, 0x0010",
378 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
379 "svshape 8, 1, 1, 6, 0",
380 "svremap 1, 0, 0, 0, 0, 0, 0, 1",
381 #"setvl 0, 0, 8, 0, 1, 1",
382 "sv.lwzbr 12.v, 4(1), 2",
383 #"sv.lwz 12.v, 0(1)" # bit-reversed
387 # SVSTATE (in this case, VL=4)
388 svstate
= SVP64State()
390 svstate
.maxvl
= 8 # MAXVL
391 print ("SVSTATE", bin(svstate
.asint()))
395 avi
= [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
397 levels
= n
.bit_length() - 1
399 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
400 av
= halfrev2(avi
, False)
401 av
= [av
[ri
[i
]] for i
in range(n
)]
403 with
Program(lst
, bigendian
=False) as program
:
404 sim
= self
.run_tst_program(program
, svstate
=svstate
,
406 mem
= sim
.mem
.dump(printout
=False)
410 self
.assertEqual(mem
, [(16, 0x010200000001),
411 (24, 0x030400000203),
412 (32, 0x050600000405),
413 (40, 0x070800000607)])
415 for i
in range(len(avi
)):
416 print ("st gpr", i
, sim
.gpr(i
+4), hex(avi
[i
]))
417 self
.assertEqual(sim
.gpr(i
+4), avi
[i
])
418 self
.assertEqual(sim
.gpr(5), SelectableInt(0x102, 64))
419 self
.assertEqual(sim
.gpr(6), SelectableInt(0x203, 64))
420 self
.assertEqual(sim
.gpr(7), SelectableInt(0x304, 64))
421 self
.assertEqual(sim
.gpr(8), SelectableInt(0x405, 64))
422 self
.assertEqual(sim
.gpr(9), SelectableInt(0x506, 64))
423 self
.assertEqual(sim
.gpr(10), SelectableInt(0x607, 64))
424 self
.assertEqual(sim
.gpr(11), SelectableInt(0x708, 64))
425 # combination of bit-reversed load with a DCT half-swap REMAP
427 for i
in range(len(avi
)):
428 print ("ld gpr", i
, sim
.gpr(i
+12), hex(av
[i
]))
429 self
.assertEqual(sim
.gpr(i
+12), av
[i
])
431 def run_tst_program(self
, prog
, initial_regs
=None,
432 svstate
=None, initial_fprs
=None):
433 if initial_regs
is None:
434 initial_regs
= [0] * 32
435 if initial_fprs
is None:
436 initial_fprs
= [0] * 32
437 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
,
438 initial_fprs
=initial_fprs
)
443 if __name__
== "__main__":