corrections to SVP64 LD/ST unit tests
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_ldst.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.isa.caller import ISACaller
6 from openpower.decoder.power_decoder import (create_pdecode)
7 from openpower.decoder.power_decoder2 import (PowerDecode2)
8 from openpower.simulator.program import Program
9 from openpower.decoder.isa.caller import ISACaller, SVP64State
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.decoder.orderedset import OrderedSet
12 from openpower.decoder.isa.all import ISA
13 from openpower.decoder.isa.test_caller import Register, run_tst
14 from openpower.sv.trans.svp64 import SVP64Asm
15 from openpower.consts import SVP64CROffs
16 from openpower.decoder.helpers import fp64toselectable
17 from copy import deepcopy
18
19
20 class DecoderTestCase(FHDLTestCase):
21
22 def _check_regs(self, sim, expected):
23 for i in range(32):
24 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
25
26 def _check_fpregs(self, sim, expected):
27 for i in range(32):
28 self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64))
29
30 def test_sv_load_store_elementstride(self):
31 """>>> lst = ["addi 1, 0, 0x0010",
32 "addi 2, 0, 0x0008",
33 "addi 4, 0, 0x1234",
34 "addi 5, 0, 0x1235",
35 "sv.stw/els 4.v, 16(1)",
36 "sv.lwz/els 8.v, 16(1)"]
37
38 note: element stride mode is only enabled when RA is a scalar
39 and when the immediate is non-zero
40
41 element stride is computed as:
42 for i in range(VL):
43 EA = (RA|0) + EXTS(D) * i
44 """
45 lst = SVP64Asm(["addi 1, 0, 0x0010",
46 "addi 2, 0, 0x0008",
47 "addi 4, 0, 0x1234",
48 "addi 5, 0, 0x1235",
49 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
50 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
51 lst = list(lst)
52
53 # SVSTATE (in this case, VL=2)
54 svstate = SVP64State()
55 svstate.vl = 2 # VL
56 svstate.maxvl = 2 # MAXVL
57 print ("SVSTATE", bin(svstate.asint()))
58
59 with Program(lst, bigendian=False) as program:
60 sim = self.run_tst_program(program, svstate=svstate)
61 mem = sim.mem.dump(printout=False)
62 print (mem)
63 # contents of memory expected at:
64 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
65 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
66 # therefore, at address 0x10 ==> 0x1234
67 # therefore, at address 0x28 ==> 0x1235
68 expected_mem = [(16, 0x1234),
69 (40, 0x1235)]
70 self.assertEqual(mem, expected_mem)
71 print(sim.gpr(1))
72 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
73 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
74
75 def test_sv_load_store_unitstride(self):
76 """>>> lst = ["addi 1, 0, 0x0010",
77 "addi 2, 0, 0x0008",
78 "addi 5, 0, 0x1234",
79 "addi 6, 0, 0x1235",
80 "sv.stw 8.v, 8(1)",
81 "sv.lwz 12.v, 8(1)"]
82
83 note: unit stride mode is only enabled when RA is a scalar.
84
85 unit stride is computed as:
86 for i in range(VL):
87 EA = (RA|0) + EXTS(D) + LDSTsize * i
88 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
89 """
90 lst = SVP64Asm(["addi 1, 0, 0x0010",
91 "addi 2, 0, 0x0008",
92 "addi 8, 0, 0x1234",
93 "addi 9, 0, 0x1235",
94 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
95 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
96 lst = list(lst)
97
98 # SVSTATE (in this case, VL=2)
99 svstate = SVP64State()
100 svstate.vl = 2 # VL
101 svstate.maxvl = 2 # MAXVL
102 print ("SVSTATE", bin(svstate.asint()))
103
104 with Program(lst, bigendian=False) as program:
105 sim = self.run_tst_program(program, svstate=svstate)
106 mem = sim.mem.dump(printout=False)
107 print ("Mem")
108 print (mem)
109 # contents of memory expected at:
110 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
111 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
112 # therefore, at address 0x24 ==> 0x1234
113 # therefore, at address 0x28 ==> 0x1235
114 self.assertEqual(mem, [(24, 0x123500001234)])
115 print(sim.gpr(1))
116 self.assertEqual(sim.gpr(12), SelectableInt(0x1234, 64))
117 self.assertEqual(sim.gpr(13), SelectableInt(0x1235, 64))
118
119 def test_sv_load_store_bitreverse(self):
120 """>>> lst = ["addi 1, 0, 0x0010",
121 "addi 2, 0, 0x0004",
122 "addi 3, 0, 0x0002",
123 "addi 5, 0, 0x101",
124 "addi 6, 0, 0x202",
125 "addi 7, 0, 0x303",
126 "addi 8, 0, 0x404",
127 "sv.stw 5.v, 0(1)",
128 "sv.lwzbr 12.v, 4(1), 2"]
129
130 note: bitreverse mode is... odd. it's the butterfly generator
131 from Cooley-Tukey FFT:
132 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
133
134 bitreverse LD is computed as:
135 for i in range(VL):
136 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
137
138 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
139 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
140
141 and thus creates the butterfly needed for one iteration of FFT.
142 the RC (shift) is to be able to offset the LDs by Radix-2 spans
143 """
144 lst = SVP64Asm(["addi 1, 0, 0x0010",
145 "addi 2, 0, 0x0000",
146 "addi 5, 0, 0x101",
147 "addi 6, 0, 0x202",
148 "addi 7, 0, 0x303",
149 "addi 8, 0, 0x404",
150 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
151 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
152 lst = list(lst)
153
154 # SVSTATE (in this case, VL=4)
155 svstate = SVP64State()
156 svstate.vl = 4 # VL
157 svstate.maxvl = 4 # MAXVL
158 print ("SVSTATE", bin(svstate.asint()))
159
160 with Program(lst, bigendian=False) as program:
161 sim = self.run_tst_program(program, svstate=svstate)
162 mem = sim.mem.dump(printout=False)
163 print (mem)
164
165 self.assertEqual(mem, [(16, 0x020200000101),
166 (24, 0x040400000303)])
167 print(sim.gpr(1))
168 # from STs
169 self.assertEqual(sim.gpr(5), SelectableInt(0x101, 64))
170 self.assertEqual(sim.gpr(6), SelectableInt(0x202, 64))
171 self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
172 self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
173 # r1=0x10, RC=0, offs=4: contents of memory expected at:
174 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
175 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
176 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
177 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
178 # therefore loaded from (bit-reversed indexing):
179 # r9 => mem[0x10] which was stored from r5
180 # r10 => mem[0x18] which was stored from r6
181 # r11 => mem[0x18] which was stored from r7
182 # r12 => mem[0x1c] which was stored from r8
183 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
184 self.assertEqual(sim.gpr(13), SelectableInt(0x303, 64))
185 self.assertEqual(sim.gpr(14), SelectableInt(0x202, 64))
186 self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
187
188 def test_sv_load_store_bitreverse(self):
189 """>>> lst = ["addi 1, 0, 0x0010",
190 "addi 2, 0, 0x0004",
191 "addi 3, 0, 0x0002",
192 "sv.stfs 4.v, 0(1)",
193 "sv.lfsbr 12.v, 4(1), 2"]
194
195 note: bitreverse mode is... odd. it's the butterfly generator
196 from Cooley-Tukey FFT:
197 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
198
199 bitreverse LD is computed as:
200 for i in range(VL):
201 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
202
203 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
204 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
205
206 and thus creates the butterfly needed for one iteration of FFT.
207 the RC (shift) is to be able to offset the LDs by Radix-2 spans
208 """
209 lst = SVP64Asm(["addi 1, 0, 0x0010",
210 "addi 2, 0, 0x0000",
211 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
212 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
213 lst = list(lst)
214
215 # SVSTATE (in this case, VL=4)
216 svstate = SVP64State()
217 svstate.vl = 4 # VL
218 svstate.maxvl = 4 # MAXVL
219 print ("SVSTATE", bin(svstate.asint()))
220
221 fprs = [0] * 32
222 scalar_a = 1.3
223 scalar_b = -2.0
224 fprs[4] = fp64toselectable(1.0)
225 fprs[5] = fp64toselectable(2.0)
226 fprs[6] = fp64toselectable(3.0)
227 fprs[7] = fp64toselectable(4.0)
228
229 # expected results, remember that bit-reversed load has been done
230 expected_fprs = deepcopy(fprs)
231 expected_fprs[12] = fprs[4] # 0b00 -> 0b00
232 expected_fprs[13] = fprs[6] # 0b01 -> 0b10
233 expected_fprs[14] = fprs[5] # 0b10 -> 0b01
234 expected_fprs[15] = fprs[7] # 0b11 -> 0b11
235
236 with Program(lst, bigendian=False) as program:
237 sim = self.run_tst_program(program, svstate=svstate,
238 initial_fprs=fprs)
239 mem = sim.mem.dump(printout=False)
240 print ("mem dump")
241 print (mem)
242
243 print ("FPRs")
244 sim.fpr.dump()
245
246 #self.assertEqual(mem, [(16, 0x020200000101),
247 # (24, 0x040400000303)])
248 self._check_fpregs(sim, expected_fprs)
249
250 def run_tst_program(self, prog, initial_regs=None,
251 svstate=None, initial_fprs=None):
252 if initial_regs is None:
253 initial_regs = [0] * 32
254 if initial_fprs is None:
255 initial_fprs = [0] * 32
256 simulator = run_tst(prog, initial_regs, svstate=svstate,
257 initial_fprs=initial_fprs)
258 simulator.gpr.dump()
259 return simulator
260
261
262 if __name__ == "__main__":
263 unittest.main()