1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from openpower
.decoder
.helpers
import fp64toselectable
17 from openpower
.decoder
.isa
.remap_dct_yield
import (halfrev2
, reverse_bits
,
19 from copy
import deepcopy
22 class DecoderTestCase(FHDLTestCase
):
24 def _check_regs(self
, sim
, expected
):
26 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
28 def _check_fpregs(self
, sim
, expected
):
30 self
.assertEqual(sim
.fpr(i
), SelectableInt(expected
[i
], 64))
32 def test_sv_load_store_elementstride(self
):
33 """>>> lst = ["addi 1, 0, 0x0010",
37 "sv.stw/els 4.v, 16(1)",
38 "sv.lwz/els 8.v, 16(1)"]
40 note: element stride mode is only enabled when RA is a scalar
41 and when the immediate is non-zero
43 element stride is computed as:
45 EA = (RA|0) + EXTS(D) * i
47 lst
= SVP64Asm(["addi 1, 0, 0x0010",
51 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
52 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
55 # SVSTATE (in this case, VL=2)
56 svstate
= SVP64State()
58 svstate
.maxvl
= 2 # MAXVL
59 print ("SVSTATE", bin(svstate
.asint()))
61 with
Program(lst
, bigendian
=False) as program
:
62 sim
= self
.run_tst_program(program
, svstate
=svstate
)
63 mem
= sim
.mem
.dump(printout
=False)
65 # contents of memory expected at:
66 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
67 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
68 # therefore, at address 0x10 ==> 0x1234
69 # therefore, at address 0x28 ==> 0x1235
70 expected_mem
= [(16, 0x1234),
72 self
.assertEqual(mem
, expected_mem
)
74 self
.assertEqual(sim
.gpr(8), SelectableInt(0x1234, 64))
75 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1235, 64))
77 def test_sv_load_store_unitstride(self
):
78 """>>> lst = ["addi 1, 0, 0x0010",
85 note: unit stride mode is only enabled when RA is a scalar.
87 unit stride is computed as:
89 EA = (RA|0) + EXTS(D) + LDSTsize * i
90 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
92 lst
= SVP64Asm(["addi 1, 0, 0x0010",
96 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
97 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
100 # SVSTATE (in this case, VL=2)
101 svstate
= SVP64State()
103 svstate
.maxvl
= 2 # MAXVL
104 print ("SVSTATE", bin(svstate
.asint()))
106 with
Program(lst
, bigendian
=False) as program
:
107 sim
= self
.run_tst_program(program
, svstate
=svstate
)
108 mem
= sim
.mem
.dump(printout
=False)
111 # contents of memory expected at:
112 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
113 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
114 # therefore, at address 0x24 ==> 0x1234
115 # therefore, at address 0x28 ==> 0x1235
116 self
.assertEqual(mem
, [(24, 0x123500001234)])
118 self
.assertEqual(sim
.gpr(12), SelectableInt(0x1234, 64))
119 self
.assertEqual(sim
.gpr(13), SelectableInt(0x1235, 64))
121 def test_sv_load_store_bitreverse(self
):
122 """>>> lst = ["addi 1, 0, 0x0010",
130 "sv.lwzbr 12.v, 4(1), 2"]
132 note: bitreverse mode is... odd. it's the butterfly generator
133 from Cooley-Tukey FFT:
134 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
136 bitreverse LD is computed as:
138 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
140 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
141 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
143 and thus creates the butterfly needed for one iteration of FFT.
144 the RC (shift) is to be able to offset the LDs by Radix-2 spans
146 lst
= SVP64Asm(["addi 1, 0, 0x0010",
152 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
153 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
156 # SVSTATE (in this case, VL=4)
157 svstate
= SVP64State()
159 svstate
.maxvl
= 4 # MAXVL
160 print ("SVSTATE", bin(svstate
.asint()))
162 with
Program(lst
, bigendian
=False) as program
:
163 sim
= self
.run_tst_program(program
, svstate
=svstate
)
164 mem
= sim
.mem
.dump(printout
=False)
167 self
.assertEqual(mem
, [(16, 0x020200000101),
168 (24, 0x040400000303)])
171 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
172 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
173 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
174 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
175 # r1=0x10, RC=0, offs=4: contents of memory expected at:
176 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
177 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
178 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
179 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
180 # therefore loaded from (bit-reversed indexing):
181 # r9 => mem[0x10] which was stored from r5
182 # r10 => mem[0x18] which was stored from r6
183 # r11 => mem[0x18] which was stored from r7
184 # r12 => mem[0x1c] which was stored from r8
185 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
186 self
.assertEqual(sim
.gpr(13), SelectableInt(0x303, 64))
187 self
.assertEqual(sim
.gpr(14), SelectableInt(0x202, 64))
188 self
.assertEqual(sim
.gpr(15), SelectableInt(0x404, 64))
190 def test_sv_load_store_bitreverse2(self
):
191 """>>> lst = ["addi 1, 0, 0x0010",
195 "sv.lfsbr 12.v, 4(1), 2"]
197 note: bitreverse mode is... odd. it's the butterfly generator
198 from Cooley-Tukey FFT:
199 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
201 bitreverse LD is computed as:
203 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
205 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
206 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
208 and thus creates the butterfly needed for one iteration of FFT.
209 the RC (shift) is to be able to offset the LDs by Radix-2 spans
211 lst
= SVP64Asm(["addi 1, 0, 0x0010",
213 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
214 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
217 # SVSTATE (in this case, VL=4)
218 svstate
= SVP64State()
220 svstate
.maxvl
= 4 # MAXVL
221 print ("SVSTATE", bin(svstate
.asint()))
226 fprs
[4] = fp64toselectable(1.0)
227 fprs
[5] = fp64toselectable(2.0)
228 fprs
[6] = fp64toselectable(3.0)
229 fprs
[7] = fp64toselectable(4.0)
231 # expected results, remember that bit-reversed load has been done
232 expected_fprs
= deepcopy(fprs
)
233 expected_fprs
[12] = fprs
[4] # 0b00 -> 0b00
234 expected_fprs
[13] = fprs
[6] # 0b01 -> 0b10
235 expected_fprs
[14] = fprs
[5] # 0b10 -> 0b01
236 expected_fprs
[15] = fprs
[7] # 0b11 -> 0b11
238 with
Program(lst
, bigendian
=False) as program
:
239 sim
= self
.run_tst_program(program
, svstate
=svstate
,
241 mem
= sim
.mem
.dump(printout
=False)
248 #self.assertEqual(mem, [(16, 0x020200000101),
249 # (24, 0x040400000303)])
250 self
._check
_fpregs
(sim
, expected_fprs
)
252 def test_sv_load_store_remap_matrix(self
):
253 """>>> lst = ["addi 1, 0, 0x0010",
260 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
261 "svshape 3, 3, 4, 0, 0",
262 "svremap 1, 1, 2, 0, 0, 0, 0, 1",
266 REMAPed a LD operation via a Matrix Multiply Schedule,
267 which is set up as 3x4 result
269 lst
= SVP64Asm(["addi 1, 0, 0x0010",
286 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
287 "svshape 3, 3, 4, 0, 0",
288 "svremap 1, 1, 2, 0, 0, 0, 0, 1",
290 #"sv.lwzbr 12.v, 4(1), 2", # bit-reversed
294 # SVSTATE (in this case, VL=4)
295 svstate
= SVP64State()
297 svstate
.maxvl
= 12 # MAXVL
298 print ("SVSTATE", bin(svstate
.asint()))
302 with
Program(lst
, bigendian
=False) as program
:
303 sim
= self
.run_tst_program(program
, svstate
=svstate
,
305 mem
= sim
.mem
.dump(printout
=False)
309 self
.assertEqual(mem
, [(16, 0x020200000101),
310 (24, 0x040400000303),
311 (32, 0x060600000505),
312 (40, 0x080800000707),
313 (48, 0x0a0a00000909),
314 (56, 0x0c0c00000b0b)])
317 self
.assertEqual(sim
.gpr(4), SelectableInt(0x101, 64))
318 self
.assertEqual(sim
.gpr(5), SelectableInt(0x202, 64))
319 self
.assertEqual(sim
.gpr(6), SelectableInt(0x303, 64))
320 self
.assertEqual(sim
.gpr(7), SelectableInt(0x404, 64))
321 self
.assertEqual(sim
.gpr(8), SelectableInt(0x505, 64))
322 self
.assertEqual(sim
.gpr(9), SelectableInt(0x606, 64))
323 self
.assertEqual(sim
.gpr(10), SelectableInt(0x707, 64))
324 self
.assertEqual(sim
.gpr(11), SelectableInt(0x808, 64))
325 # combination of bit-reversed load with a Matrix REMAP
328 self
.assertEqual(sim
.gpr(20+i
), SelectableInt(0x101, 64))
329 self
.assertEqual(sim
.gpr(23+i
), SelectableInt(0x505, 64))
330 self
.assertEqual(sim
.gpr(26+i
), SelectableInt(0x909, 64))
331 self
.assertEqual(sim
.gpr(29+i
), SelectableInt(0x202, 64))
333 def test_sv_load_store_bitreverse_remap_halfswap(self
):
334 """>>> lst = ["addi 1, 0, 0x0010",
345 "svshape 8, 1, 1, 6, 0",
346 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
347 "sv.lwzbr 12.v, 4(1), 2"]
349 bitreverse LD is computed as:
351 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
353 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
354 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
356 and thus creates the butterfly needed for one iteration of FFT.
357 the RC (shift) is to be able to offset the LDs by Radix-2 spans
359 on top of the bit-reversal is a REMAP for half-swaps for DCT
362 lst
= SVP64Asm(["addi 1, 0, 0x0010",
372 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
373 "svshape 8, 1, 1, 6, 0",
374 "svremap 1, 0, 0, 0, 0, 0, 0, 1",
375 #"setvl 0, 0, 8, 0, 1, 1",
376 "sv.lwzbr 12.v, 4(1), 2", # bit-reversed
381 # SVSTATE (in this case, VL=4)
382 svstate
= SVP64State()
384 svstate
.maxvl
= 8 # MAXVL
385 print ("SVSTATE", bin(svstate
.asint()))
389 avi
= [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
391 levels
= n
.bit_length() - 1
393 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
394 av
= halfrev2(avi
, False)
395 av
= [av
[ri
[i
]] for i
in range(n
)]
397 with
Program(lst
, bigendian
=False) as program
:
398 sim
= self
.run_tst_program(program
, svstate
=svstate
,
400 mem
= sim
.mem
.dump(printout
=False)
404 self
.assertEqual(mem
, [(16, 0x010200000001),
405 (24, 0x030400000203),
406 (32, 0x050600000405),
407 (40, 0x070800000607)])
409 for i
in range(len(avi
)):
410 print ("st gpr", i
, sim
.gpr(i
+4), hex(avi
[i
]))
411 self
.assertEqual(sim
.gpr(i
+4), avi
[i
])
412 self
.assertEqual(sim
.gpr(5), SelectableInt(0x102, 64))
413 self
.assertEqual(sim
.gpr(6), SelectableInt(0x203, 64))
414 self
.assertEqual(sim
.gpr(7), SelectableInt(0x304, 64))
415 self
.assertEqual(sim
.gpr(8), SelectableInt(0x405, 64))
416 self
.assertEqual(sim
.gpr(9), SelectableInt(0x506, 64))
417 self
.assertEqual(sim
.gpr(10), SelectableInt(0x607, 64))
418 self
.assertEqual(sim
.gpr(11), SelectableInt(0x708, 64))
419 # combination of bit-reversed load with a DCT half-swap REMAP
421 for i
in range(len(avi
)):
422 print ("ld gpr", i
, sim
.gpr(i
+12), hex(av
[i
]))
423 self
.assertEqual(sim
.gpr(i
+12), av
[i
])
425 def run_tst_program(self
, prog
, initial_regs
=None,
426 svstate
=None, initial_fprs
=None):
427 if initial_regs
is None:
428 initial_regs
= [0] * 32
429 if initial_fprs
is None:
430 initial_fprs
= [0] * 32
431 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
,
432 initial_fprs
=initial_fprs
)
437 if __name__
== "__main__":