1 from nmigen
import Module
, Signal
2 from nmigen
.back
.pysim
import Simulator
, Delay
, Settle
3 from nmutil
.formaltest
import FHDLTestCase
5 from openpower
.decoder
.isa
.caller
import ISACaller
6 from openpower
.decoder
.power_decoder
import (create_pdecode
)
7 from openpower
.decoder
.power_decoder2
import (PowerDecode2
)
8 from openpower
.simulator
.program
import Program
9 from openpower
.decoder
.isa
.caller
import ISACaller
, SVP64State
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.decoder
.orderedset
import OrderedSet
12 from openpower
.decoder
.isa
.all
import ISA
13 from openpower
.decoder
.isa
.test_caller
import Register
, run_tst
14 from openpower
.sv
.trans
.svp64
import SVP64Asm
15 from openpower
.consts
import SVP64CROffs
16 from openpower
.decoder
.helpers
import fp64toselectable
17 from copy
import deepcopy
20 class DecoderTestCase(FHDLTestCase
):
22 def _check_regs(self
, sim
, expected
):
24 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64))
26 def _check_fpregs(self
, sim
, expected
):
28 self
.assertEqual(sim
.fpr(i
), SelectableInt(expected
[i
], 64))
30 def test_sv_load_store_elementstride(self
):
31 """>>> lst = ["addi 1, 0, 0x0010",
35 "sv.stw/els 5.v, 16(1)",
36 "sv.lwz/els 9.v, 16(1)"]
38 note: element stride mode is only enabled when RA is a scalar
39 and when the immediate is non-zero
41 element stride is computed as:
43 EA = (RA|0) + EXTS(D) * i
45 lst
= SVP64Asm(["addi 1, 0, 0x0010",
49 "sv.stw/els 5.v, 24(1)", # scalar r1 + 16 + 24*offs
50 "sv.lwz/els 9.v, 24(1)"]) # scalar r1 + 16 + 24*offs
53 # SVSTATE (in this case, VL=2)
54 svstate
= SVP64State()
55 svstate
.vl
[0:7] = 2 # VL
56 svstate
.maxvl
[0:7] = 2 # MAXVL
57 print ("SVSTATE", bin(svstate
.spr
.asint()))
59 with
Program(lst
, bigendian
=False) as program
:
60 sim
= self
.run_tst_program(program
, svstate
=svstate
)
61 mem
= sim
.mem
.dump(printout
=False)
63 # contents of memory expected at:
64 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
65 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
66 # therefore, at address 0x10 ==> 0x1234
67 # therefore, at address 0x28 ==> 0x1235
68 expected_mem
= [(16, 0x1234),
70 self
.assertEqual(mem
, expected_mem
)
72 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1234, 64))
73 self
.assertEqual(sim
.gpr(10), SelectableInt(0x1235, 64))
75 def test_sv_load_store_unitstride(self
):
76 """>>> lst = ["addi 1, 0, 0x0010",
83 note: unit stride mode is only enabled when RA is a scalar.
85 unit stride is computed as:
87 EA = (RA|0) + EXTS(D) + LDSTsize * i
88 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
90 lst
= SVP64Asm(["addi 1, 0, 0x0010",
94 "sv.stw 5.v, 8(1)", # scalar r1 + 8 + wordlen*offs
95 "sv.lwz 9.v, 8(1)"]) # scalar r1 + 8 + wordlen*offs
98 # SVSTATE (in this case, VL=2)
99 svstate
= SVP64State()
100 svstate
.vl
[0:7] = 2 # VL
101 svstate
.maxvl
[0:7] = 2 # MAXVL
102 print ("SVSTATE", bin(svstate
.spr
.asint()))
104 with
Program(lst
, bigendian
=False) as program
:
105 sim
= self
.run_tst_program(program
, svstate
=svstate
)
106 mem
= sim
.mem
.dump(printout
=False)
108 # contents of memory expected at:
109 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
110 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
111 # therefore, at address 0x24 ==> 0x1234
112 # therefore, at address 0x28 ==> 0x1235
113 self
.assertEqual(mem
, [(24, 0x123500001234)])
115 self
.assertEqual(sim
.gpr(9), SelectableInt(0x1234, 64))
116 self
.assertEqual(sim
.gpr(10), SelectableInt(0x1235, 64))
118 def test_sv_load_store_bitreverse(self
):
119 """>>> lst = ["addi 1, 0, 0x0010",
127 "sv.lwzbr 12.v, 4(1), 2"]
129 note: bitreverse mode is... odd. it's the butterfly generator
130 from Cooley-Tukey FFT:
131 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
133 bitreverse LD is computed as:
135 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
137 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
138 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
140 and thus creates the butterfly needed for one iteration of FFT.
141 the RC (shift) is to be able to offset the LDs by Radix-2 spans
143 lst
= SVP64Asm(["addi 1, 0, 0x0010",
149 "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs
150 "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed
153 # SVSTATE (in this case, VL=4)
154 svstate
= SVP64State()
156 svstate
.maxvl
= 4 # MAXVL
157 print ("SVSTATE", bin(svstate
.asint()))
159 with
Program(lst
, bigendian
=False) as program
:
160 sim
= self
.run_tst_program(program
, svstate
=svstate
)
161 mem
= sim
.mem
.dump(printout
=False)
164 self
.assertEqual(mem
, [(16, 0x020200000101),
165 (24, 0x040400000303)])
168 self
.assertEqual(sim
.gpr(5), SelectableInt(0x101, 64))
169 self
.assertEqual(sim
.gpr(6), SelectableInt(0x202, 64))
170 self
.assertEqual(sim
.gpr(7), SelectableInt(0x303, 64))
171 self
.assertEqual(sim
.gpr(8), SelectableInt(0x404, 64))
172 # r1=0x10, RC=0, offs=4: contents of memory expected at:
173 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
174 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
175 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
176 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
177 # therefore loaded from (bit-reversed indexing):
178 # r9 => mem[0x10] which was stored from r5
179 # r10 => mem[0x18] which was stored from r6
180 # r11 => mem[0x18] which was stored from r7
181 # r12 => mem[0x1c] which was stored from r8
182 self
.assertEqual(sim
.gpr(12), SelectableInt(0x101, 64))
183 self
.assertEqual(sim
.gpr(13), SelectableInt(0x303, 64))
184 self
.assertEqual(sim
.gpr(14), SelectableInt(0x202, 64))
185 self
.assertEqual(sim
.gpr(15), SelectableInt(0x404, 64))
187 def test_sv_load_store_bitreverse(self
):
188 """>>> lst = ["addi 1, 0, 0x0010",
192 "sv.lfsbr 12.v, 4(1), 2"]
194 note: bitreverse mode is... odd. it's the butterfly generator
195 from Cooley-Tukey FFT:
196 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
198 bitreverse LD is computed as:
200 EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
202 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
203 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
205 and thus creates the butterfly needed for one iteration of FFT.
206 the RC (shift) is to be able to offset the LDs by Radix-2 spans
208 lst
= SVP64Asm(["addi 1, 0, 0x0010",
210 "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs
211 "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed
214 # SVSTATE (in this case, VL=4)
215 svstate
= SVP64State()
217 svstate
.maxvl
= 4 # MAXVL
218 print ("SVSTATE", bin(svstate
.asint()))
223 fprs
[4] = fp64toselectable(1.0)
224 fprs
[5] = fp64toselectable(2.0)
225 fprs
[6] = fp64toselectable(3.0)
226 fprs
[7] = fp64toselectable(4.0)
228 # expected results, remember that bit-reversed load has been done
229 expected_fprs
= deepcopy(fprs
)
230 expected_fprs
[12] = fprs
[4] # 0b00 -> 0b00
231 expected_fprs
[13] = fprs
[6] # 0b01 -> 0b10
232 expected_fprs
[14] = fprs
[5] # 0b10 -> 0b01
233 expected_fprs
[15] = fprs
[7] # 0b11 -> 0b11
235 with
Program(lst
, bigendian
=False) as program
:
236 sim
= self
.run_tst_program(program
, svstate
=svstate
,
238 mem
= sim
.mem
.dump(printout
=False)
245 #self.assertEqual(mem, [(16, 0x020200000101),
246 # (24, 0x040400000303)])
247 self
._check
_fpregs
(sim
, expected_fprs
)
249 def run_tst_program(self
, prog
, initial_regs
=None,
250 svstate
=None, initial_fprs
=None):
251 if initial_regs
is None:
252 initial_regs
= [0] * 32
253 if initial_fprs
is None:
254 initial_fprs
= [0] * 32
255 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
,
256 initial_fprs
=initial_fprs
)
261 if __name__
== "__main__":