add nayuki dct
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_ldst.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.isa.caller import ISACaller
6 from openpower.decoder.power_decoder import (create_pdecode)
7 from openpower.decoder.power_decoder2 import (PowerDecode2)
8 from openpower.simulator.program import Program
9 from openpower.decoder.isa.caller import ISACaller, SVP64State
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.decoder.orderedset import OrderedSet
12 from openpower.decoder.isa.all import ISA
13 from openpower.decoder.isa.test_caller import Register, run_tst
14 from openpower.sv.trans.svp64 import SVP64Asm
15 from openpower.consts import SVP64CROffs
16 from openpower.decoder.helpers import fp64toselectable
17 from copy import deepcopy
18
19
20 class DecoderTestCase(FHDLTestCase):
21
22 def _check_regs(self, sim, expected):
23 for i in range(32):
24 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
25
26 def _check_fpregs(self, sim, expected):
27 for i in range(32):
28 self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64))
29
30 def test_sv_load_store_elementstride(self):
31 """>>> lst = ["addi 1, 0, 0x0010",
32 "addi 2, 0, 0x0008",
33 "addi 5, 0, 0x1234",
34 "addi 6, 0, 0x1235",
35 "sv.stw/els 5.v, 16(1)",
36 "sv.lwz/els 9.v, 16(1)"]
37
38 note: element stride mode is only enabled when RA is a scalar
39 and when the immediate is non-zero
40
41 element stride is computed as:
42 for i in range(VL):
43 EA = (RA|0) + EXTS(D) * i
44 """
45 lst = SVP64Asm(["addi 1, 0, 0x0010",
46 "addi 2, 0, 0x0008",
47 "addi 5, 0, 0x1234",
48 "addi 6, 0, 0x1235",
49 "sv.stw/els 5.v, 24(1)", # scalar r1 + 16 + 24*offs
50 "sv.lwz/els 9.v, 24(1)"]) # scalar r1 + 16 + 24*offs
51 lst = list(lst)
52
53 # SVSTATE (in this case, VL=2)
54 svstate = SVP64State()
55 svstate.vl[0:7] = 2 # VL
56 svstate.maxvl[0:7] = 2 # MAXVL
57 print ("SVSTATE", bin(svstate.spr.asint()))
58
59 with Program(lst, bigendian=False) as program:
60 sim = self.run_tst_program(program, svstate=svstate)
61 mem = sim.mem.dump(printout=False)
62 print (mem)
63 # contents of memory expected at:
64 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
65 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
66 # therefore, at address 0x10 ==> 0x1234
67 # therefore, at address 0x28 ==> 0x1235
68 expected_mem = [(16, 0x1234),
69 (40, 0x1235)]
70 self.assertEqual(mem, expected_mem)
71 print(sim.gpr(1))
72 self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
73 self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
74
75 def test_sv_load_store_unitstride(self):
76 """>>> lst = ["addi 1, 0, 0x0010",
77 "addi 2, 0, 0x0008",
78 "addi 5, 0, 0x1234",
79 "addi 6, 0, 0x1235",
80 "sv.stw 5.v, 8(1)",
81 "sv.lwz 9.v, 8(1)"]
82
83 note: unit stride mode is only enabled when RA is a scalar.
84
85 unit stride is computed as:
86 for i in range(VL):
87 EA = (RA|0) + EXTS(D) + LDSTsize * i
88 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
89 """
90 lst = SVP64Asm(["addi 1, 0, 0x0010",
91 "addi 2, 0, 0x0008",
92 "addi 5, 0, 0x1234",
93 "addi 6, 0, 0x1235",
94 "sv.stw 5.v, 8(1)", # scalar r1 + 8 + wordlen*offs
95 "sv.lwz 9.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
96 lst = list(lst)
97
98 # SVSTATE (in this case, VL=2)
99 svstate = SVP64State()
100 svstate.vl[0:7] = 2 # VL
101 svstate.maxvl[0:7] = 2 # MAXVL
102 print ("SVSTATE", bin(svstate.spr.asint()))
103
104 with Program(lst, bigendian=False) as program:
105 sim = self.run_tst_program(program, svstate=svstate)
106 mem = sim.mem.dump(printout=False)
107 print (mem)
108 # contents of memory expected at:
109 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
110 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
111 # therefore, at address 0x24 ==> 0x1234
112 # therefore, at address 0x28 ==> 0x1235
113 self.assertEqual(mem, [(24, 0x123500001234)])
114 print(sim.gpr(1))
115 self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
116 self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
117
118 def test_sv_load_store_bitreverse(self):
119 """>>> lst = ["addi 1, 0, 0x0010",
120 "addi 2, 0, 0x0004",
121 "addi 3, 0, 0x0002",
122 "addi 5, 0, 0x101",
123 "addi 6, 0, 0x202",
124 "addi 7, 0, 0x303",
125 "addi 8, 0, 0x404",
126 "sv.stw 5.v, 0(1)",
127 "sv.lwzbr 12.v, 4(1), 2"]
128
129 note: bitreverse mode is... odd. it's the butterfly generator
130 from Cooley-Tukey FFT:
131 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
132
133 bitreverse LD is computed as:
134 for i in range(VL):
135 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
136
137 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
138 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
139
140 and thus creates the butterfly needed for one iteration of FFT.
141 the RC (shift) is to be able to offset the LDs by Radix-2 spans
142 """
143 lst = SVP64Asm(["addi 1, 0, 0x0010",
144 "addi 2, 0, 0x0000",
145 "addi 5, 0, 0x101",
146 "addi 6, 0, 0x202",
147 "addi 7, 0, 0x303",
148 "addi 8, 0, 0x404",
149 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
150 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
151 lst = list(lst)
152
153 # SVSTATE (in this case, VL=4)
154 svstate = SVP64State()
155 svstate.vl = 4 # VL
156 svstate.maxvl = 4 # MAXVL
157 print ("SVSTATE", bin(svstate.asint()))
158
159 with Program(lst, bigendian=False) as program:
160 sim = self.run_tst_program(program, svstate=svstate)
161 mem = sim.mem.dump(printout=False)
162 print (mem)
163
164 self.assertEqual(mem, [(16, 0x020200000101),
165 (24, 0x040400000303)])
166 print(sim.gpr(1))
167 # from STs
168 self.assertEqual(sim.gpr(5), SelectableInt(0x101, 64))
169 self.assertEqual(sim.gpr(6), SelectableInt(0x202, 64))
170 self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
171 self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
172 # r1=0x10, RC=0, offs=4: contents of memory expected at:
173 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
174 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
175 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
176 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
177 # therefore loaded from (bit-reversed indexing):
178 # r9 => mem[0x10] which was stored from r5
179 # r10 => mem[0x18] which was stored from r6
180 # r11 => mem[0x18] which was stored from r7
181 # r12 => mem[0x1c] which was stored from r8
182 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
183 self.assertEqual(sim.gpr(13), SelectableInt(0x303, 64))
184 self.assertEqual(sim.gpr(14), SelectableInt(0x202, 64))
185 self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
186
187 def test_sv_load_store_bitreverse(self):
188 """>>> lst = ["addi 1, 0, 0x0010",
189 "addi 2, 0, 0x0004",
190 "addi 3, 0, 0x0002",
191 "sv.stfs 4.v, 0(1)",
192 "sv.lfsbr 12.v, 4(1), 2"]
193
194 note: bitreverse mode is... odd. it's the butterfly generator
195 from Cooley-Tukey FFT:
196 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
197
198 bitreverse LD is computed as:
199 for i in range(VL):
200 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
201
202 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
203 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
204
205 and thus creates the butterfly needed for one iteration of FFT.
206 the RC (shift) is to be able to offset the LDs by Radix-2 spans
207 """
208 lst = SVP64Asm(["addi 1, 0, 0x0010",
209 "addi 2, 0, 0x0000",
210 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
211 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
212 lst = list(lst)
213
214 # SVSTATE (in this case, VL=4)
215 svstate = SVP64State()
216 svstate.vl = 4 # VL
217 svstate.maxvl = 4 # MAXVL
218 print ("SVSTATE", bin(svstate.asint()))
219
220 fprs = [0] * 32
221 scalar_a = 1.3
222 scalar_b = -2.0
223 fprs[4] = fp64toselectable(1.0)
224 fprs[5] = fp64toselectable(2.0)
225 fprs[6] = fp64toselectable(3.0)
226 fprs[7] = fp64toselectable(4.0)
227
228 # expected results, remember that bit-reversed load has been done
229 expected_fprs = deepcopy(fprs)
230 expected_fprs[12] = fprs[4] # 0b00 -> 0b00
231 expected_fprs[13] = fprs[6] # 0b01 -> 0b10
232 expected_fprs[14] = fprs[5] # 0b10 -> 0b01
233 expected_fprs[15] = fprs[7] # 0b11 -> 0b11
234
235 with Program(lst, bigendian=False) as program:
236 sim = self.run_tst_program(program, svstate=svstate,
237 initial_fprs=fprs)
238 mem = sim.mem.dump(printout=False)
239 print ("mem dump")
240 print (mem)
241
242 print ("FPRs")
243 sim.fpr.dump()
244
245 #self.assertEqual(mem, [(16, 0x020200000101),
246 # (24, 0x040400000303)])
247 self._check_fpregs(sim, expected_fprs)
248
249 def run_tst_program(self, prog, initial_regs=None,
250 svstate=None, initial_fprs=None):
251 if initial_regs is None:
252 initial_regs = [0] * 32
253 if initial_fprs is None:
254 initial_fprs = [0] * 32
255 simulator = run_tst(prog, initial_regs, svstate=svstate,
256 initial_fprs=initial_fprs)
257 simulator.gpr.dump()
258 return simulator
259
260
261 if __name__ == "__main__":
262 unittest.main()