add mode for half-swap, to be combined with LD-bit-reversed for loading DCT
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_ldst.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.isa.caller import ISACaller
6 from openpower.decoder.power_decoder import (create_pdecode)
7 from openpower.decoder.power_decoder2 import (PowerDecode2)
8 from openpower.simulator.program import Program
9 from openpower.decoder.isa.caller import ISACaller, SVP64State
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.decoder.orderedset import OrderedSet
12 from openpower.decoder.isa.all import ISA
13 from openpower.decoder.isa.test_caller import Register, run_tst
14 from openpower.sv.trans.svp64 import SVP64Asm
15 from openpower.consts import SVP64CROffs
16 from openpower.decoder.helpers import fp64toselectable
17 from copy import deepcopy
18
19
20 class DecoderTestCase(FHDLTestCase):
21
22 def _check_regs(self, sim, expected):
23 for i in range(32):
24 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
25
26 def _check_fpregs(self, sim, expected):
27 for i in range(32):
28 self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64))
29
30 def test_sv_load_store_elementstride(self):
31 """>>> lst = ["addi 1, 0, 0x0010",
32 "addi 2, 0, 0x0008",
33 "addi 4, 0, 0x1234",
34 "addi 5, 0, 0x1235",
35 "sv.stw/els 4.v, 16(1)",
36 "sv.lwz/els 8.v, 16(1)"]
37
38 note: element stride mode is only enabled when RA is a scalar
39 and when the immediate is non-zero
40
41 element stride is computed as:
42 for i in range(VL):
43 EA = (RA|0) + EXTS(D) * i
44 """
45 lst = SVP64Asm(["addi 1, 0, 0x0010",
46 "addi 2, 0, 0x0008",
47 "addi 4, 0, 0x1234",
48 "addi 5, 0, 0x1235",
49 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
50 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
51 lst = list(lst)
52
53 # SVSTATE (in this case, VL=2)
54 svstate = SVP64State()
55 svstate.vl = 2 # VL
56 svstate.maxvl = 2 # MAXVL
57 print ("SVSTATE", bin(svstate.asint()))
58
59 with Program(lst, bigendian=False) as program:
60 sim = self.run_tst_program(program, svstate=svstate)
61 mem = sim.mem.dump(printout=False)
62 print (mem)
63 # contents of memory expected at:
64 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
65 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
66 # therefore, at address 0x10 ==> 0x1234
67 # therefore, at address 0x28 ==> 0x1235
68 expected_mem = [(16, 0x1234),
69 (40, 0x1235)]
70 self.assertEqual(mem, expected_mem)
71 print(sim.gpr(1))
72 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
73 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
74
75 def test_sv_load_store_unitstride(self):
76 """>>> lst = ["addi 1, 0, 0x0010",
77 "addi 2, 0, 0x0008",
78 "addi 5, 0, 0x1234",
79 "addi 6, 0, 0x1235",
80 "sv.stw 8.v, 8(1)",
81 "sv.lwz 12.v, 8(1)"]
82
83 note: unit stride mode is only enabled when RA is a scalar.
84
85 unit stride is computed as:
86 for i in range(VL):
87 EA = (RA|0) + EXTS(D) + LDSTsize * i
88 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
89 """
90 lst = SVP64Asm(["addi 1, 0, 0x0010",
91 "addi 2, 0, 0x0008",
92 "addi 8, 0, 0x1234",
93 "addi 9, 0, 0x1235",
94 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
95 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
96 lst = list(lst)
97
98 # SVSTATE (in this case, VL=2)
99 svstate = SVP64State()
100 svstate.vl = 2 # VL
101 svstate.maxvl = 2 # MAXVL
102 print ("SVSTATE", bin(svstate.asint()))
103
104 with Program(lst, bigendian=False) as program:
105 sim = self.run_tst_program(program, svstate=svstate)
106 mem = sim.mem.dump(printout=False)
107 print ("Mem")
108 print (mem)
109 # contents of memory expected at:
110 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
111 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
112 # therefore, at address 0x24 ==> 0x1234
113 # therefore, at address 0x28 ==> 0x1235
114 self.assertEqual(mem, [(24, 0x123500001234)])
115 print(sim.gpr(1))
116 self.assertEqual(sim.gpr(12), SelectableInt(0x1234, 64))
117 self.assertEqual(sim.gpr(13), SelectableInt(0x1235, 64))
118
119 def test_sv_load_store_bitreverse(self):
120 """>>> lst = ["addi 1, 0, 0x0010",
121 "addi 2, 0, 0x0004",
122 "addi 3, 0, 0x0002",
123 "addi 5, 0, 0x101",
124 "addi 6, 0, 0x202",
125 "addi 7, 0, 0x303",
126 "addi 8, 0, 0x404",
127 "sv.stw 5.v, 0(1)",
128 "sv.lwzbr 12.v, 4(1), 2"]
129
130 note: bitreverse mode is... odd. it's the butterfly generator
131 from Cooley-Tukey FFT:
132 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
133
134 bitreverse LD is computed as:
135 for i in range(VL):
136 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
137
138 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
139 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
140
141 and thus creates the butterfly needed for one iteration of FFT.
142 the RC (shift) is to be able to offset the LDs by Radix-2 spans
143 """
144 lst = SVP64Asm(["addi 1, 0, 0x0010",
145 "addi 2, 0, 0x0000",
146 "addi 5, 0, 0x101",
147 "addi 6, 0, 0x202",
148 "addi 7, 0, 0x303",
149 "addi 8, 0, 0x404",
150 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
151 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
152 lst = list(lst)
153
154 # SVSTATE (in this case, VL=4)
155 svstate = SVP64State()
156 svstate.vl = 4 # VL
157 svstate.maxvl = 4 # MAXVL
158 print ("SVSTATE", bin(svstate.asint()))
159
160 with Program(lst, bigendian=False) as program:
161 sim = self.run_tst_program(program, svstate=svstate)
162 mem = sim.mem.dump(printout=False)
163 print (mem)
164
165 self.assertEqual(mem, [(16, 0x020200000101),
166 (24, 0x040400000303)])
167 print(sim.gpr(1))
168 # from STs
169 self.assertEqual(sim.gpr(5), SelectableInt(0x101, 64))
170 self.assertEqual(sim.gpr(6), SelectableInt(0x202, 64))
171 self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
172 self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
173 # r1=0x10, RC=0, offs=4: contents of memory expected at:
174 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
175 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
176 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
177 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
178 # therefore loaded from (bit-reversed indexing):
179 # r9 => mem[0x10] which was stored from r5
180 # r10 => mem[0x18] which was stored from r6
181 # r11 => mem[0x18] which was stored from r7
182 # r12 => mem[0x1c] which was stored from r8
183 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
184 self.assertEqual(sim.gpr(13), SelectableInt(0x303, 64))
185 self.assertEqual(sim.gpr(14), SelectableInt(0x202, 64))
186 self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
187
188 def test_sv_load_store_bitreverse2(self):
189 """>>> lst = ["addi 1, 0, 0x0010",
190 "addi 2, 0, 0x0004",
191 "addi 3, 0, 0x0002",
192 "sv.stfs 4.v, 0(1)",
193 "sv.lfsbr 12.v, 4(1), 2"]
194
195 note: bitreverse mode is... odd. it's the butterfly generator
196 from Cooley-Tukey FFT:
197 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
198
199 bitreverse LD is computed as:
200 for i in range(VL):
201 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
202
203 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
204 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
205
206 and thus creates the butterfly needed for one iteration of FFT.
207 the RC (shift) is to be able to offset the LDs by Radix-2 spans
208 """
209 lst = SVP64Asm(["addi 1, 0, 0x0010",
210 "addi 2, 0, 0x0000",
211 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
212 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
213 lst = list(lst)
214
215 # SVSTATE (in this case, VL=4)
216 svstate = SVP64State()
217 svstate.vl = 4 # VL
218 svstate.maxvl = 4 # MAXVL
219 print ("SVSTATE", bin(svstate.asint()))
220
221 fprs = [0] * 32
222 scalar_a = 1.3
223 scalar_b = -2.0
224 fprs[4] = fp64toselectable(1.0)
225 fprs[5] = fp64toselectable(2.0)
226 fprs[6] = fp64toselectable(3.0)
227 fprs[7] = fp64toselectable(4.0)
228
229 # expected results, remember that bit-reversed load has been done
230 expected_fprs = deepcopy(fprs)
231 expected_fprs[12] = fprs[4] # 0b00 -> 0b00
232 expected_fprs[13] = fprs[6] # 0b01 -> 0b10
233 expected_fprs[14] = fprs[5] # 0b10 -> 0b01
234 expected_fprs[15] = fprs[7] # 0b11 -> 0b11
235
236 with Program(lst, bigendian=False) as program:
237 sim = self.run_tst_program(program, svstate=svstate,
238 initial_fprs=fprs)
239 mem = sim.mem.dump(printout=False)
240 print ("mem dump")
241 print (mem)
242
243 print ("FPRs")
244 sim.fpr.dump()
245
246 #self.assertEqual(mem, [(16, 0x020200000101),
247 # (24, 0x040400000303)])
248 self._check_fpregs(sim, expected_fprs)
249
250 def test_sv_load_store_bitreverse_remap(self):
251 """>>> lst = ["addi 1, 0, 0x0010",
252 "addi 2, 0, 0x0004",
253 "addi 3, 0, 0x0002",
254 "addi 5, 0, 0x101",
255 "addi 6, 0, 0x202",
256 "addi 7, 0, 0x303",
257 "addi 8, 0, 0x404",
258 "sv.stw 5.v, 0(1)",
259 "svshape 4, 4, 4, 0, 0",
260 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
261 "sv.lwzbr 12.v, 4(1), 2"]
262
263 note: bitreverse mode is... odd. it's the butterfly generator
264 from Cooley-Tukey FFT:
265 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
266
267 bitreverse LD is computed as:
268 for i in range(VL):
269 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
270
271 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
272 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
273
274 and thus creates the butterfly needed for one iteration of FFT.
275 the RC (shift) is to be able to offset the LDs by Radix-2 spans
276 """
277 lst = SVP64Asm(["addi 1, 0, 0x0010",
278 "addi 2, 0, 0x0000",
279 "addi 4, 0, 0x101",
280 "addi 5, 0, 0x202",
281 "addi 6, 0, 0x303",
282 "addi 7, 0, 0x404",
283 "addi 8, 0, 0x505",
284 "addi 9, 0, 0x606",
285 "addi 10, 0, 0x707",
286 "addi 11, 0, 0x808",
287 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
288 "svshape 4, 4, 2, 0, 0",
289 "svremap 31, 1, 2, 3, 0, 0, 0, 1",
290 #"setvl 0, 0, 8, 0, 1, 1",
291 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
292 lst = list(lst)
293
294 # SVSTATE (in this case, VL=4)
295 svstate = SVP64State()
296 svstate.vl = 8 # VL
297 svstate.maxvl = 8 # MAXVL
298 print ("SVSTATE", bin(svstate.asint()))
299
300 regs = [0] * 64
301
302 with Program(lst, bigendian=False) as program:
303 sim = self.run_tst_program(program, svstate=svstate,
304 initial_regs=regs)
305 mem = sim.mem.dump(printout=False)
306 print ("Mem")
307 print (mem)
308
309 self.assertEqual(mem, [(16, 0x020200000101),
310 (24, 0x040400000303),
311 (32, 0x060600000505),
312 (40, 0x080800000707)])
313 print(sim.gpr(1))
314 # from STs
315 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
316 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
317 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
318 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
319 self.assertEqual(sim.gpr(8), SelectableInt(0x505, 64))
320 self.assertEqual(sim.gpr(9), SelectableInt(0x606, 64))
321 self.assertEqual(sim.gpr(10), SelectableInt(0x707, 64))
322 self.assertEqual(sim.gpr(11), SelectableInt(0x808, 64))
323 # combination of bit-reversed load with a Matrix REMAP
324 # schedule
325 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
326 self.assertEqual(sim.gpr(13), SelectableInt(0x505, 64))
327 self.assertEqual(sim.gpr(14), SelectableInt(0x303, 64))
328 self.assertEqual(sim.gpr(15), SelectableInt(0x707, 64))
329 self.assertEqual(sim.gpr(16), SelectableInt(0x202, 64))
330 self.assertEqual(sim.gpr(17), SelectableInt(0x606, 64))
331 self.assertEqual(sim.gpr(18), SelectableInt(0x404, 64))
332 self.assertEqual(sim.gpr(19), SelectableInt(0x808, 64))
333
334 def run_tst_program(self, prog, initial_regs=None,
335 svstate=None, initial_fprs=None):
336 if initial_regs is None:
337 initial_regs = [0] * 32
338 if initial_fprs is None:
339 initial_fprs = [0] * 32
340 simulator = run_tst(prog, initial_regs, svstate=svstate,
341 initial_fprs=initial_fprs)
342 simulator.gpr.dump()
343 return simulator
344
345
346 if __name__ == "__main__":
347 unittest.main()