1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from openpower
.decoder
.helpers
import fp64toselectable
17 from copy
import deepcopy
20 class DecoderTestCase(FHDLTestCase
):
22 def _check_regs(self
, sim
, expected
):
24 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
26 def _check_fpregs(self
, sim
, expected
):
28 self
.assertEqual(sim
.fpr(i
), SelectableInt(expected
[i
], 64))
30 def test_sv_load_store_elementstride(self
):
31 """>>> lst = ["addi 1, 0, 0x0010",
35 "sv.stw/els 4.v, 16(1)",
36 "sv.lwz/els 8.v, 16(1)"]
38 note: element stride mode is only enabled when RA is a scalar
39 and when the immediate is non-zero
41 element stride is computed as:
43 EA = (RA|0) + EXTS(D) * i
45 lst
= SVP64Asm(["addi 1, 0, 0x0010",
49 "sv.stw/els 4.v, 24(1)", # scalar r1 + 16 + 24*offs
50 "sv.lwz/els 8.v, 24(1)"]) # scalar r1 + 16 + 24*offs
53 # SVSTATE (in this case, VL=2)
54 svstate
= SVP64State()
56 svstate
.maxvl
= 2 # MAXVL
57 print ("SVSTATE", bin(svstate
.asint()))
59 with
Program(lst
, bigendian
=False) as program
:
60 sim
= self
.run_tst_program(program
, svstate
=svstate
)
61 mem
= sim
.mem
.dump(printout
=False)
63 # contents of memory expected at:
64 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
65 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
66 # therefore, at address 0x10 ==> 0x1234
67 # therefore, at address 0x28 ==> 0x1235
68 expected_mem
= [(16, 0x1234),
70 self
.assertEqual(mem
, expected_mem
)
72 self
.assertEqual(sim
.gpr(8), SelectableInt(0x1234, 64))
73 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1235, 64))
75 def test_sv_load_store_unitstride(self
):
76 """>>> lst = ["addi 1, 0, 0x0010",
83 note: unit stride mode is only enabled when RA is a scalar.
85 unit stride is computed as:
87 EA = (RA|0) + EXTS(D) + LDSTsize * i
88 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
90 lst
= SVP64Asm(["addi 1, 0, 0x0010",
94 "sv.stw 8.v, 8(1)", # scalar r1 + 8 + wordlen*offs
95 "sv.lwz 12.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
98 # SVSTATE (in this case, VL=2)
99 svstate
= SVP64State()
101 svstate
.maxvl
= 2 # MAXVL
102 print ("SVSTATE", bin(svstate
.asint()))
104 with
Program(lst
, bigendian
=False) as program
:
105 sim
= self
.run_tst_program(program
, svstate
=svstate
)
106 mem
= sim
.mem
.dump(printout
=False)
109 # contents of memory expected at:
110 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
111 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
112 # therefore, at address 0x24 ==> 0x1234
113 # therefore, at address 0x28 ==> 0x1235
114 self
.assertEqual(mem
, [(24, 0x123500001234)])
116 self
.assertEqual(sim
.gpr(12), SelectableInt(0x1234, 64))
117 self
.assertEqual(sim
.gpr(13), SelectableInt(0x1235, 64))
119 def test_sv_load_store_bitreverse(self
):
120 """>>> lst = ["addi 1, 0, 0x0010",
128 "sv.lwzbr 12.v, 4(1), 2"]
130 note: bitreverse mode is... odd. it's the butterfly generator
131 from Cooley-Tukey FFT:
132 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
134 bitreverse LD is computed as:
136 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
138 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
139 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
141 and thus creates the butterfly needed for one iteration of FFT.
142 the RC (shift) is to be able to offset the LDs by Radix-2 spans
144 lst
= SVP64Asm(["addi 1, 0, 0x0010",
150 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
151 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
154 # SVSTATE (in this case, VL=4)
155 svstate
= SVP64State()
157 svstate
.maxvl
= 4 # MAXVL
158 print ("SVSTATE", bin(svstate
.asint()))
160 with
Program(lst
, bigendian
=False) as program
:
161 sim
= self
.run_tst_program(program
, svstate
=svstate
)
162 mem
= sim
.mem
.dump(printout
=False)
165 self
.assertEqual(mem
, [(16, 0x020200000101),
166 (24, 0x040400000303)])
169 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
170 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
171 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
172 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
173 # r1=0x10, RC=0, offs=4: contents of memory expected at:
174 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
175 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
176 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
177 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
178 # therefore loaded from (bit-reversed indexing):
179 # r9 => mem[0x10] which was stored from r5
180 # r10 => mem[0x18] which was stored from r6
181 # r11 => mem[0x18] which was stored from r7
182 # r12 => mem[0x1c] which was stored from r8
183 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
184 self
.assertEqual(sim
.gpr(13), SelectableInt(0x303, 64))
185 self
.assertEqual(sim
.gpr(14), SelectableInt(0x202, 64))
186 self
.assertEqual(sim
.gpr(15), SelectableInt(0x404, 64))
188 def test_sv_load_store_bitreverse2(self
):
189 """>>> lst = ["addi 1, 0, 0x0010",
193 "sv.lfsbr 12.v, 4(1), 2"]
195 note: bitreverse mode is... odd. it's the butterfly generator
196 from Cooley-Tukey FFT:
197 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
199 bitreverse LD is computed as:
201 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
203 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
204 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
206 and thus creates the butterfly needed for one iteration of FFT.
207 the RC (shift) is to be able to offset the LDs by Radix-2 spans
209 lst
= SVP64Asm(["addi 1, 0, 0x0010",
211 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
212 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
215 # SVSTATE (in this case, VL=4)
216 svstate
= SVP64State()
218 svstate
.maxvl
= 4 # MAXVL
219 print ("SVSTATE", bin(svstate
.asint()))
224 fprs
[4] = fp64toselectable(1.0)
225 fprs
[5] = fp64toselectable(2.0)
226 fprs
[6] = fp64toselectable(3.0)
227 fprs
[7] = fp64toselectable(4.0)
229 # expected results, remember that bit-reversed load has been done
230 expected_fprs
= deepcopy(fprs
)
231 expected_fprs
[12] = fprs
[4] # 0b00 -> 0b00
232 expected_fprs
[13] = fprs
[6] # 0b01 -> 0b10
233 expected_fprs
[14] = fprs
[5] # 0b10 -> 0b01
234 expected_fprs
[15] = fprs
[7] # 0b11 -> 0b11
236 with
Program(lst
, bigendian
=False) as program
:
237 sim
= self
.run_tst_program(program
, svstate
=svstate
,
239 mem
= sim
.mem
.dump(printout
=False)
246 #self.assertEqual(mem, [(16, 0x020200000101),
247 # (24, 0x040400000303)])
248 self
._check
_fpregs
(sim
, expected_fprs
)
250 def test_sv_load_store_bitreverse_remap(self
):
251 """>>> lst = ["addi 1, 0, 0x0010",
259 "svshape 4, 4, 4, 0, 0",
260 "svremap 31, 1, 2, 3, 0, 0, 0, 0",
261 "sv.lwzbr 12.v, 4(1), 2"]
263 note: bitreverse mode is... odd. it's the butterfly generator
264 from Cooley-Tukey FFT:
265 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
267 bitreverse LD is computed as:
269 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
271 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
272 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
274 and thus creates the butterfly needed for one iteration of FFT.
275 the RC (shift) is to be able to offset the LDs by Radix-2 spans
277 lst
= SVP64Asm(["addi 1, 0, 0x0010",
287 "sv.stw 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
288 "svshape 4, 4, 2, 0, 0",
289 "svremap 31, 1, 2, 3, 0, 0, 0, 1",
290 #"setvl 0, 0, 8, 0, 1, 1",
291 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
294 # SVSTATE (in this case, VL=4)
295 svstate
= SVP64State()
297 svstate
.maxvl
= 8 # MAXVL
298 print ("SVSTATE", bin(svstate
.asint()))
302 with
Program(lst
, bigendian
=False) as program
:
303 sim
= self
.run_tst_program(program
, svstate
=svstate
,
305 mem
= sim
.mem
.dump(printout
=False)
309 self
.assertEqual(mem
, [(16, 0x020200000101),
310 (24, 0x040400000303),
311 (32, 0x060600000505),
312 (40, 0x080800000707)])
315 self
.assertEqual(sim
.gpr(4), SelectableInt(0x101, 64))
316 self
.assertEqual(sim
.gpr(5), SelectableInt(0x202, 64))
317 self
.assertEqual(sim
.gpr(6), SelectableInt(0x303, 64))
318 self
.assertEqual(sim
.gpr(7), SelectableInt(0x404, 64))
319 self
.assertEqual(sim
.gpr(8), SelectableInt(0x505, 64))
320 self
.assertEqual(sim
.gpr(9), SelectableInt(0x606, 64))
321 self
.assertEqual(sim
.gpr(10), SelectableInt(0x707, 64))
322 self
.assertEqual(sim
.gpr(11), SelectableInt(0x808, 64))
323 # combination of bit-reversed load with a Matrix REMAP
325 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
326 self
.assertEqual(sim
.gpr(13), SelectableInt(0x505, 64))
327 self
.assertEqual(sim
.gpr(14), SelectableInt(0x303, 64))
328 self
.assertEqual(sim
.gpr(15), SelectableInt(0x707, 64))
329 self
.assertEqual(sim
.gpr(16), SelectableInt(0x202, 64))
330 self
.assertEqual(sim
.gpr(17), SelectableInt(0x606, 64))
331 self
.assertEqual(sim
.gpr(18), SelectableInt(0x404, 64))
332 self
.assertEqual(sim
.gpr(19), SelectableInt(0x808, 64))
334 def run_tst_program(self
, prog
, initial_regs
=None,
335 svstate
=None, initial_fprs
=None):
336 if initial_regs
is None:
337 initial_regs
= [0] * 32
338 if initial_fprs
is None:
339 initial_fprs
= [0] * 32
340 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
,
341 initial_fprs
=initial_fprs
)
346 if __name__
== "__main__":