647e793
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_ldst.py
1 import unittest
2 from copy import deepcopy
3
4 from nmutil.formaltest import FHDLTestCase
5 from openpower.decoder.helpers import fp64toselectable
6 from openpower.decoder.isa.caller import SVP64State
7 from openpower.decoder.isa.remap_dct_yield import halfrev2, reverse_bits
8 from openpower.decoder.isa.test_caller import run_tst
9 from openpower.decoder.selectable_int import SelectableInt
10 from openpower.simulator.program import Program
11 from openpower.sv.trans.svp64 import SVP64Asm
12
13
14 def write_byte(mem, addr, val):
15 addr, offs = (addr // 8)*8, (addr % 8)*8
16 mask = (0xff << offs)
17 value = mem.get(addr, 0) & ~mask
18 value = value | (val << offs)
19 mem[addr] = value & 0xffff_ffff_ffff_ffff
20
21
22 class DecoderTestCase(FHDLTestCase):
23
24 def _check_regs(self, sim, expected):
25 for i in range(32):
26 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
27
28 def _check_fpregs(self, sim, expected):
29 for i in range(32):
30 self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64))
31
32 def test_sv_load_store_strncpy(self):
33 """>>> lst = [
34 ]
35
36 strncpy using post-increment ld/st, sv.bc, and data-dependent ffirst
37 """
38 maxvl = 4
39 lst = SVP64Asm(
40 [
41 "mtspr 9, 3", # move r3 to CTR
42 "addi 0,0,0", # initialise r0 to zero
43 # chr-copy loop starts here:
44 # for (i = 0; i < n && src[i] != '\0'; i++)
45 # dest[i] = src[i];
46 # VL (and r1) = MIN(CTR,MAXVL=4)
47 "setvl 1,0,%d,0,1,1" % maxvl,
48 # load VL bytes (update r10 addr)
49 "sv.lbzu/pi *16, 1(10)",
50 "sv.cmpi/ff=eq/vli *0,1,*16,0", # compare against zero, truncate VL
51 # store VL bytes (update r12 addr)
52 "sv.stbu/pi *16, 1(12)",
53 "sv.bc/all 0, *2, -0x1c", # test CTR, stop if cmpi failed
54 # zeroing loop starts here:
55 # for ( ; i < n; i++)
56 # dest[i] = '\0';
57 # VL (and r1) = MIN(CTR,MAXVL=4)
58 "setvl 1,0,%d,0,1,1" % maxvl,
59 # store VL zeros (update r12 addr)
60 "sv.stbu/pi 0, 1(12)",
61 "sv.bc 16, *0, -0xc", # decrement CTR by VL, stop at zero
62 ]
63 )
64 lst = list(lst)
65
66 tst_string = "hello\x00bye\x00"
67 initial_regs = [0] * 32
68 initial_regs[3] = len(tst_string) # including the zero
69 initial_regs[10] = 16 # load address
70 initial_regs[12] = 40 # store address
71
72 # some memory with identifying garbage in it
73 initial_mem = {16: 0xf0f1_f2f3_f4f5_f6f7,
74 24: 0x4041_4243_4445_4647,
75 40: 0x8081_8283_8485_8687,
76 48: 0x9091_9293_9495_9697,
77 }
78
79 for i, c in enumerate(tst_string):
80 write_byte(initial_mem, 16+i, ord(c))
81
82 # now get the expected results: copy the string to the other address,
83 # but terminate at first zero (strncpy, duh)
84 expected_mem = deepcopy(initial_mem)
85 copyzeros = False
86 strlen = 0
87 for i, c in enumerate(tst_string):
88 c = ord(c)
89 if not copyzeros:
90 write_byte(expected_mem, 40+i, c)
91 strlen = i+1
92 else:
93 write_byte(expected_mem, 40+i, 0)
94 if c == 0:
95 copyzeros = True
96
97 with Program(lst, bigendian=False) as program:
98 sim = self.run_tst_program(program, initial_mem=initial_mem,
99 initial_regs=initial_regs)
100 mem = sim.mem.dump(printout=True, asciidump=True)
101 #print (mem)
102 # contents of memory expected at:
103 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
104 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
105 # therefore, at address 0x10 ==> 0x1234
106 # therefore, at address 0x28 ==> 0x1235
107 for (k, val) in expected_mem.items():
108 print("mem, val", k, hex(val))
109 self.assertEqual(mem, list(expected_mem.items()))
110 print(sim.gpr(1))
111 # reg 10 (the LD EA) is expected to be nearest
112 # 16 + strlen, rounded up
113 rounded = ((strlen+maxvl-1) // maxvl) * maxvl
114 self.assertEqual(sim.gpr(10), SelectableInt(16+rounded, 64))
115 # whereas reg 10 (the ST EA) is expected to be 40+strlen
116 self.assertEqual(sim.gpr(12), SelectableInt(
117 40+len(tst_string), 64))
118
119 def test_sv_load_store_postinc(self):
120 """>>> lst = ["addi 20, 0, 0x0010",
121 "addi 3, 0, 0x0008",
122 "addi 4, 0, 0x1234",
123 "addi 5, 0, 0x1235",
124 "sv.stwu/pi *4, 24(20)",
125 "sv.lwu/pi *8, 24(20)"]
126
127 element stride is computed as:
128 for i in range(VL):
129 EA = (RA|0) + EXTS(D) * i
130
131 load-update with post-increment will do this however:
132 for i in range(VL):
133 *vector = MEM(RA)
134 EA = (RA|0) + EXTS(D)
135 RA = EA # update RA *after*
136
137 whereas without post-increment it would be:
138 for i in range(VL):
139 EA = (RA|0) + EXTS(D) # EA calculated (and used) *BEFORE* load
140 *vector = MEM(EA)
141 RA = EA # still updated after but it's used before
142 """
143 lst = SVP64Asm(["addi 20, 0, 0x0010",
144 "addi 22, 0, 0x0010",
145 "addi 3, 0, 0x0008",
146 "addi 4, 0, 0x1234",
147 "addi 5, 0, 0x1235",
148 "sv.stwu/pi *4, 24(22)", # scalar r22 += 24 on update
149 "sv.lwzu/pi *8, 24(20)" # scalar r20 += 24 on update
150 ])
151 lst = list(lst)
152
153 # SVSTATE (in this case, VL=2)
154 svstate = SVP64State()
155 svstate.vl = 2 # VL
156 svstate.maxvl = 2 # MAXVL
157 print("SVSTATE", bin(svstate.asint()))
158
159 with Program(lst, bigendian=False) as program:
160 sim = self.run_tst_program(program, svstate=svstate)
161 mem = sim.mem.dump(printout=False)
162 print(mem)
163 # contents of memory expected at:
164 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
165 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
166 # therefore, at address 0x10 ==> 0x1234
167 # therefore, at address 0x28 ==> 0x1235
168 expected_mem = [(16, 0x1234),
169 (40, 0x1235)]
170 self.assertEqual(mem, expected_mem)
171 print(sim.gpr(1))
172 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
173 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
174 # reg 20 (the EA) is expected to be the initial 16,
175 # plus 2x24 (2 lots of immediates). 16+2*24=64
176 self.assertEqual(sim.gpr(20), SelectableInt(64, 64))
177 # likewise, reg 22 - for the store - also 16+2*24.
178 self.assertEqual(sim.gpr(22), SelectableInt(64, 64))
179
180 def test_sv_load_store_elementstride(self):
181 """>>> lst = ["addi 2, 0, 0x0010",
182 "addi 3, 0, 0x0008",
183 "addi 4, 0, 0x1234",
184 "addi 5, 0, 0x1235",
185 "sv.stw/els *4, 16(2)",
186 "sv.lwz/els *8, 16(2)"]
187
188 note: element stride mode is only enabled when RA is a scalar
189 and when the immediate is non-zero
190
191 element stride is computed as:
192 for i in range(VL):
193 EA = (RA|0) + EXTS(D) * i
194 """
195 lst = SVP64Asm(["addi 2, 0, 0x0010",
196 "addi 3, 0, 0x0008",
197 "addi 4, 0, 0x1234",
198 "addi 5, 0, 0x1235",
199 "sv.stw/els *4, 24(2)", # scalar r1 + 16 + 24*offs
200 "sv.lwz/els *8, 24(2)"]) # scalar r1 + 16 + 24*offs
201 lst = list(lst)
202
203 # SVSTATE (in this case, VL=2)
204 svstate = SVP64State()
205 svstate.vl = 2 # VL
206 svstate.maxvl = 2 # MAXVL
207 print("SVSTATE", bin(svstate.asint()))
208
209 with Program(lst, bigendian=False) as program:
210 sim = self.run_tst_program(program, svstate=svstate)
211 mem = sim.mem.dump(printout=False)
212 print(mem)
213 # contents of memory expected at:
214 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
215 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
216 # therefore, at address 0x10 ==> 0x1234
217 # therefore, at address 0x28 ==> 0x1235
218 expected_mem = [(16, 0x1234),
219 (40, 0x1235)]
220 self.assertEqual(mem, expected_mem)
221 print(sim.gpr(1))
222 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
223 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
224
225 def test_sv_load_store_unitstride(self):
226 """>>> lst = ["addi 1, 0, 0x0010",
227 "addi 2, 0, 0x0008",
228 "addi 5, 0, 0x1234",
229 "addi 6, 0, 0x1235",
230 "sv.stw *8, 8(1)",
231 "sv.lwz *12, 8(1)"]
232
233 note: unit stride mode is only enabled when RA is a scalar.
234
235 unit stride is computed as:
236 for i in range(VL):
237 EA = (RA|0) + EXTS(D) + LDSTsize * i
238 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
239 """
240 lst = SVP64Asm(["addi 1, 0, 0x0010",
241 "addi 2, 0, 0x0008",
242 "addi 8, 0, 0x1234",
243 "addi 9, 0, 0x1235",
244 "sv.stw *8, 8(1)", # scalar r1 + 8 + wordlen*offs
245 "sv.lwz *12, 8(1)"]) # scalar r1 + 8 + wordlen*offs
246 lst = list(lst)
247
248 # SVSTATE (in this case, VL=2)
249 svstate = SVP64State()
250 svstate.vl = 2 # VL
251 svstate.maxvl = 2 # MAXVL
252 print("SVSTATE", bin(svstate.asint()))
253
254 with Program(lst, bigendian=False) as program:
255 sim = self.run_tst_program(program, svstate=svstate)
256 mem = sim.mem.dump(printout=False)
257 print("Mem")
258 print(mem)
259 # contents of memory expected at:
260 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
261 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
262 # therefore, at address 0x24 ==> 0x1234
263 # therefore, at address 0x28 ==> 0x1235
264 self.assertEqual(mem, [(24, 0x123500001234)])
265 print(sim.gpr(1))
266 self.assertEqual(sim.gpr(12), SelectableInt(0x1234, 64))
267 self.assertEqual(sim.gpr(13), SelectableInt(0x1235, 64))
268
269 @unittest.skip("deprecated, needs Scalar LDST-shifted")
270 def test_sv_load_store_shifted(self):
271 """>>> lst = ["addi 1, 0, 0x0010",
272 "addi 2, 0, 0x0004",
273 "addi 3, 0, 0x0002",
274 "addi 4, 0, 0x101",
275 "addi 5, 0, 0x202",
276 "addi 6, 0, 0x303",
277 "addi 7, 0, 0x404",
278 "sv.stw *4, 0(1)",
279 "sv.lwzsh *12, 4(1), 2"]
280
281 shifted LD is computed as:
282 for i in range(VL):
283 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
284 """
285 lst = SVP64Asm(["addi 1, 0, 0x0010",
286 "addi 2, 0, 0x0000",
287 "addi 4, 0, 0x101",
288 "addi 5, 0, 0x202",
289 "addi 6, 0, 0x303",
290 "addi 7, 0, 0x404",
291 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
292 "sv.lwzsh *12, 4(1), 2"]) # bit-reversed
293 lst = list(lst)
294
295 # SVSTATE (in this case, VL=4)
296 svstate = SVP64State()
297 svstate.vl = 4 # VL
298 svstate.maxvl = 4 # MAXVL
299 print("SVSTATE", bin(svstate.asint()))
300
301 with Program(lst, bigendian=False) as program:
302 sim = self.run_tst_program(program, svstate=svstate)
303 mem = sim.mem.dump(printout=False)
304 print(mem)
305
306 self.assertEqual(mem, [(16, 0x020200000101),
307 (24, 0x040400000303)])
308 print(sim.gpr(1))
309 # from STs
310 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
311 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
312 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
313 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
314 # r1=0x10, RC=0, offs=4: contents of memory expected at:
315 # element 0: EA = r1 + 0b00*4 => 0x10 + 0b00*4 => 0x10
316 # element 1: EA = r1 + 0b01*4 => 0x10 + 0b01*4 => 0x18
317 # element 2: EA = r1 + 0b10*4 => 0x10 + 0b10*4 => 0x14
318 # element 3: EA = r1 + 0b11*4 => 0x10 + 0b11*4 => 0x1c
319 # therefore loaded from (bit-reversed indexing):
320 # r9 => mem[0x10] which was stored from r5
321 # r10 => mem[0x18] which was stored from r6
322 # r11 => mem[0x18] which was stored from r7
323 # r12 => mem[0x1c] which was stored from r8
324 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
325 self.assertEqual(sim.gpr(13), SelectableInt(0x202, 64))
326 self.assertEqual(sim.gpr(14), SelectableInt(0x303, 64))
327 self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
328
329 @unittest.skip("deprecated, needs Scalar LDST-shifted")
330 def test_sv_load_store_shifted_fp(self):
331 """>>> lst = ["addi 1, 0, 0x0010",
332 "addi 2, 0, 0x0004",
333 "addi 3, 0, 0x0002",
334 "addi 4, 0, 0x101",
335 "addi 5, 0, 0x202",
336 "addi 6, 0, 0x303",
337 "addi 7, 0, 0x404",
338 "sv.std *4, 0(1)",
339 "sv.lfdbr *12, 4(1), 2"]
340
341 shifted LD is computed as:
342 for i in range(VL):
343 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
344 """
345 lst = SVP64Asm(["addi 1, 0, 0x0010",
346 "addi 2, 0, 0x0000",
347 "addi 4, 0, 0x101",
348 "addi 5, 0, 0x202",
349 "addi 6, 0, 0x303",
350 "addi 7, 0, 0x404",
351 "sv.std *4, 0(1)", # scalar r1 + 0 + wordlen*offs
352 "sv.lfdsh *12, 8(1), 2"]) # shifted
353 lst = list(lst)
354
355 # SVSTATE (in this case, VL=4)
356 svstate = SVP64State()
357 svstate.vl = 4 # VL
358 svstate.maxvl = 4 # MAXVL
359 print("SVSTATE", bin(svstate.asint()))
360
361 fprs = [0] * 32
362
363 with Program(lst, bigendian=False) as program:
364 sim = self.run_tst_program(program, svstate=svstate,
365 initial_fprs=fprs)
366 mem = sim.mem.dump(printout=False)
367 print(mem)
368
369 self.assertEqual(mem, [(16, 0x101),
370 (24, 0x202),
371 (32, 0x303),
372 (40, 0x404),
373 ])
374 print(sim.gpr(1))
375 # from STs
376 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
377 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
378 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
379 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
380 # r1=0x10, RC=0, offs=4: contents of memory expected at:
381 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
382 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
383 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
384 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
385 # therefore loaded from (bit-reversed indexing):
386 # r9 => mem[0x10] which was stored from r5
387 # r10 => mem[0x18] which was stored from r6
388 # r11 => mem[0x18] which was stored from r7
389 # r12 => mem[0x1c] which was stored from r8
390 self.assertEqual(sim.fpr(12), SelectableInt(0x101, 64))
391 self.assertEqual(sim.fpr(13), SelectableInt(0x202, 64))
392 self.assertEqual(sim.fpr(14), SelectableInt(0x303, 64))
393 self.assertEqual(sim.fpr(15), SelectableInt(0x404, 64))
394
395 @unittest.skip("deprecated, needs Scalar LDST-shifted")
396 def test_sv_load_store_shifted2(self):
397 """>>> lst = ["addi 1, 0, 0x0010",
398 "addi 2, 0, 0x0004",
399 "addi 3, 0, 0x0002",
400 "sv.stfs *4, 0(1)",
401 "sv.lfssh *12, 4(1), 2"]
402
403 shifted LD is computed as:
404 for i in range(VL):
405 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
406
407 """
408 lst = SVP64Asm(["addi 1, 0, 0x0010",
409 "addi 2, 0, 0x0000",
410 "sv.stfs *4, 0(1)", # scalar r1 + 0 + wordlen*offs
411 "sv.lfssh *12, 4(1), 2"]) # shifted (by zero, but hey)
412 lst = list(lst)
413
414 # SVSTATE (in this case, VL=4)
415 svstate = SVP64State()
416 svstate.vl = 4 # VL
417 svstate.maxvl = 4 # MAXVL
418 print("SVSTATE", bin(svstate.asint()))
419
420 fprs = [0] * 32
421 scalar_a = 1.3
422 scalar_b = -2.0
423 fprs[4] = fp64toselectable(1.0)
424 fprs[5] = fp64toselectable(2.0)
425 fprs[6] = fp64toselectable(3.0)
426 fprs[7] = fp64toselectable(4.0)
427
428 # expected results, remember that bit-reversed load has been done
429 expected_fprs = deepcopy(fprs)
430 expected_fprs[12] = fprs[4] # 0b00 -> 0b00
431 expected_fprs[13] = fprs[5] # 0b10 -> 0b01
432 expected_fprs[14] = fprs[6] # 0b01 -> 0b10
433 expected_fprs[15] = fprs[7] # 0b11 -> 0b11
434
435 with Program(lst, bigendian=False) as program:
436 sim = self.run_tst_program(program, svstate=svstate,
437 initial_fprs=fprs)
438 mem = sim.mem.dump(printout=False)
439 print("mem dump")
440 print(mem)
441
442 print("FPRs")
443 sim.fpr.dump()
444
445 # self.assertEqual(mem, [(16, 0x020200000101),
446 # (24, 0x040400000303)])
447 self._check_fpregs(sim, expected_fprs)
448
449 def test_sv_load_store_remap_matrix(self):
450 """>>> lst = ["addi 1, 0, 0x0010",
451 "addi 2, 0, 0x0004",
452 "addi 3, 0, 0x0002",
453 "addi 5, 0, 0x101",
454 "addi 6, 0, 0x202",
455 "addi 7, 0, 0x303",
456 "addi 8, 0, 0x404",
457 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
458 "svshape 3, 3, 4, 0, 0",
459 "svremap 1, 1, 2, 0, 0, 0, 0",
460 "sv.lwz *20, 0(1)",
461 ]
462
463 REMAPed a LD operation via a Matrix Multiply Schedule,
464 which is set up as 3x4 result
465 """
466 lst = SVP64Asm(["addi 1, 0, 0x0010",
467 "addi 2, 0, 0x0000",
468 "addi 4, 0, 0x101",
469 "addi 5, 0, 0x202",
470 "addi 6, 0, 0x303",
471 "addi 7, 0, 0x404",
472 "addi 8, 0, 0x505",
473 "addi 9, 0, 0x606",
474 "addi 10, 0, 0x707",
475 "addi 11, 0, 0x808",
476 "addi 12, 0, 0x909",
477 "addi 13, 0, 0xa0a",
478 "addi 14, 0, 0xb0b",
479 "addi 15, 0, 0xc0c",
480 "addi 16, 0, 0xd0d",
481 "addi 17, 0, 0xe0e",
482 "addi 18, 0, 0xf0f",
483 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
484 "svshape 3, 3, 4, 0, 0",
485 "svremap 1, 1, 2, 0, 0, 0, 0",
486 "sv.lwz *20, 0(1)",
487 ])
488 lst = list(lst)
489
490 # SVSTATE (in this case, VL=4)
491 svstate = SVP64State()
492 svstate.vl = 12 # VL
493 svstate.maxvl = 12 # MAXVL
494 print("SVSTATE", bin(svstate.asint()))
495
496 regs = [0] * 64
497
498 with Program(lst, bigendian=False) as program:
499 sim = self.run_tst_program(program, svstate=svstate,
500 initial_regs=regs)
501 mem = sim.mem.dump(printout=False)
502 print("Mem")
503 print(mem)
504
505 self.assertEqual(mem, [(16, 0x020200000101),
506 (24, 0x040400000303),
507 (32, 0x060600000505),
508 (40, 0x080800000707),
509 (48, 0x0a0a00000909),
510 (56, 0x0c0c00000b0b)])
511 print(sim.gpr(1))
512 # from STs
513 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
514 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
515 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
516 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
517 self.assertEqual(sim.gpr(8), SelectableInt(0x505, 64))
518 self.assertEqual(sim.gpr(9), SelectableInt(0x606, 64))
519 self.assertEqual(sim.gpr(10), SelectableInt(0x707, 64))
520 self.assertEqual(sim.gpr(11), SelectableInt(0x808, 64))
521 # combination of bit-reversed load with a Matrix REMAP
522 # schedule
523 for i in range(3):
524 self.assertEqual(sim.gpr(20+i), SelectableInt(0x101, 64))
525 self.assertEqual(sim.gpr(23+i), SelectableInt(0x505, 64))
526 self.assertEqual(sim.gpr(26+i), SelectableInt(0x909, 64))
527 self.assertEqual(sim.gpr(29+i), SelectableInt(0x202, 64))
528
529 def test_sv_load_store_bitreverse_remap_halfswap(self):
530 """>>> lst = ["addi 1, 0, 0x0010",
531 "addi 2, 0, 0x0000",
532 "addi 4, 0, 0x101",
533 "addi 5, 0, 0x202",
534 "addi 6, 0, 0x303",
535 "addi 7, 0, 0x404",
536 "addi 8, 0, 0x505",
537 "addi 9, 0, 0x606",
538 "addi 10, 0, 0x707",
539 "addi 11, 0, 0x808",
540 "sv.stw *5, 0(1)",
541 "svshape 8, 1, 1, 6, 0",
542 "svremap 31, 1, 2, 3, 0, 0, 0",
543 "sv.lwz/els *12, 4(1)"]
544
545 shifted LD is computed as:
546 for i in range(VL):
547 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
548
549 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
550 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
551
552 and thus creates the butterfly needed for one iteration of FFT.
553 the RC (shift) is to be able to offset the LDs by Radix-2 spans
554
555 on top of the bit-reversal is a REMAP for half-swaps for DCT
556 in-place.
557 """
558 lst = SVP64Asm(["addi 1, 0, 0x0010",
559 "addi 2, 0, 0x0000",
560 "addi 4, 0, 0x001",
561 "addi 5, 0, 0x102",
562 "addi 6, 0, 0x203",
563 "addi 7, 0, 0x304",
564 "addi 8, 0, 0x405",
565 "addi 9, 0, 0x506",
566 "addi 10, 0, 0x607",
567 "addi 11, 0, 0x708",
568 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
569 "svshape 8, 1, 1, 6, 0",
570 "svremap 1, 0, 0, 0, 0, 0, 0",
571 #"setvl 0, 0, 8, 0, 1, 1",
572 "sv.lwz/els *12, 4(1)",
573 #"sv.lwz *12, 0(1)"
574 ])
575 lst = list(lst)
576
577 # SVSTATE (in this case, VL=4)
578 svstate = SVP64State()
579 svstate.vl = 8 # VL
580 svstate.maxvl = 8 # MAXVL
581 print("SVSTATE", bin(svstate.asint()))
582
583 regs = [0] * 64
584
585 avi = [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
586 n = len(avi)
587 levels = n.bit_length() - 1
588 ri = list(range(n))
589 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
590 av = halfrev2(avi, False)
591 av = [av[ri[i]] for i in range(n)]
592
593 with Program(lst, bigendian=False) as program:
594 sim = self.run_tst_program(program, svstate=svstate,
595 initial_regs=regs)
596 mem = sim.mem.dump(printout=False)
597 print("Mem")
598 print(mem)
599
600 self.assertEqual(mem, [(16, 0x010200000001),
601 (24, 0x030400000203),
602 (32, 0x050600000405),
603 (40, 0x070800000607)])
604 # from STs
605 for i in range(len(avi)):
606 print("st gpr", i, sim.gpr(i+4), hex(avi[i]))
607 for i in range(len(avi)):
608 self.assertEqual(sim.gpr(i+4), avi[i])
609 # combination of bit-reversed load with a DCT half-swap REMAP
610 # schedule
611 for i in range(len(avi)):
612 print("ld gpr", i, sim.gpr(i+12), hex(av[i]))
613 for i in range(len(avi)):
614 self.assertEqual(sim.gpr(i+12), av[i])
615
616 def test_sv_load_store_bitreverse_remap_halfswap_idct(self):
617 """>>> lst = ["addi 1, 0, 0x0010",
618 "addi 2, 0, 0x0000",
619 "addi 4, 0, 0x101",
620 "addi 5, 0, 0x202",
621 "addi 6, 0, 0x303",
622 "addi 7, 0, 0x404",
623 "addi 8, 0, 0x505",
624 "addi 9, 0, 0x606",
625 "addi 10, 0, 0x707",
626 "addi 11, 0, 0x808",
627 "sv.stw *5, 0(1)",
628 "svshape 8, 1, 1, 6, 0",
629 "svremap 31, 1, 2, 3, 0, 0, 0",
630 "sv.lwz/els *12, 4(1)"]
631
632 bitreverse LD is computed as:
633 for i in range(VL):
634 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
635
636 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
637 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
638
639 and thus creates the butterfly needed for one iteration of FFT.
640 the RC (shift) is to be able to offset the LDs by Radix-2 spans
641
642 on top of the bit-reversal is a REMAP for half-swaps for DCT
643 in-place.
644 """
645 lst = SVP64Asm(["addi 1, 0, 0x0010",
646 "addi 2, 0, 0x0000",
647 "addi 4, 0, 0x001",
648 "addi 5, 0, 0x102",
649 "addi 6, 0, 0x203",
650 "addi 7, 0, 0x304",
651 "addi 8, 0, 0x405",
652 "addi 9, 0, 0x506",
653 "addi 10, 0, 0x607",
654 "addi 11, 0, 0x708",
655 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
656 "svshape 8, 1, 1, 14, 0",
657 "svremap 16, 0, 0, 0, 0, 0, 0",
658 #"setvl 0, 0, 8, 0, 1, 1",
659 "sv.lwz/els *12, 4(1)",
660 #"sv.lwz *12, 0(1)"
661 ])
662 lst = list(lst)
663
664 # SVSTATE (in this case, VL=4)
665 svstate = SVP64State()
666 svstate.vl = 8 # VL
667 svstate.maxvl = 8 # MAXVL
668 print("SVSTATE", bin(svstate.asint()))
669
670 regs = [0] * 64
671
672 avi = [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
673 n = len(avi)
674 levels = n.bit_length() - 1
675 ri = list(range(n))
676 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
677 av = [avi[ri[i]] for i in range(n)]
678 av = halfrev2(av, True)
679
680 with Program(lst, bigendian=False) as program:
681 sim = self.run_tst_program(program, svstate=svstate,
682 initial_regs=regs)
683 mem = sim.mem.dump(printout=False)
684 print("Mem")
685 print(mem)
686
687 self.assertEqual(mem, [(16, 0x010200000001),
688 (24, 0x030400000203),
689 (32, 0x050600000405),
690 (40, 0x070800000607)])
691 # from STs
692 for i in range(len(avi)):
693 print("st gpr", i, sim.gpr(i+4), hex(avi[i]))
694 for i in range(len(avi)):
695 self.assertEqual(sim.gpr(i+4), avi[i])
696 # combination of bit-reversed load with a DCT half-swap REMAP
697 # schedule
698 for i in range(len(avi)):
699 print("ld gpr", i, sim.gpr(i+12), hex(av[i]))
700 for i in range(len(avi)):
701 self.assertEqual(sim.gpr(i+12), av[i])
702
703 def run_tst_program(self, prog, initial_regs=None,
704 svstate=None, initial_fprs=None,
705 initial_mem=None):
706 if initial_regs is None:
707 initial_regs = [0] * 32
708 if initial_fprs is None:
709 initial_fprs = [0] * 32
710 simulator = run_tst(prog, initial_regs, svstate=svstate,
711 initial_fprs=initial_fprs,
712 mem=initial_mem)
713 print("GPRs")
714 simulator.gpr.dump()
715 print("FPRs")
716 simulator.fpr.dump()
717 return simulator
718
719
720 if __name__ == "__main__":
721 unittest.main()