76138dc483a45d98a5a572ddbfaedad8b3b8f3b8
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_ldst.py
1 import unittest
2 from copy import deepcopy
3
4 from nmutil.formaltest import FHDLTestCase
5 from openpower.decoder.helpers import fp64toselectable
6 from openpower.decoder.isa.caller import SVP64State
7 from openpower.decoder.isa.remap_dct_yield import halfrev2, reverse_bits
8 from openpower.decoder.isa.test_caller import run_tst
9 from openpower.decoder.selectable_int import SelectableInt
10 from openpower.simulator.program import Program
11 from openpower.insndb.asm import SVP64Asm
12
13
14 def write_byte(mem, addr, val):
15 addr, offs = (addr // 8)*8, (addr % 8)*8
16 mask = (0xff << offs)
17 value = mem.get(addr, 0) & ~mask
18 value = value | (val << offs)
19 mem[addr] = value & 0xffff_ffff_ffff_ffff
20
21
22 class DecoderTestCase(FHDLTestCase):
23
24 def _check_regs(self, sim, expected):
25 for i in range(32):
26 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
27
28 def _check_fpregs(self, sim, expected):
29 for i in range(32):
30 self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64))
31
32 def test_sv_load_store_strncpy(self):
33 """>>> lst = [
34 ]
35
36 strncpy using post-increment ld/st, sv.bc, and data-dependent ffirst.
37 note that /lf (Load-Fault) mode is not set in this example when it
38 should be. however implementing Load-Fault in ISACaller is tricky
39 (requires implementing multiple hardware models)
40 """
41 maxvl = 4
42 lst = SVP64Asm(
43 [
44 "mtspr 9, 3", # move r3 to CTR
45 "addi 0,0,0", # initialise r0 to zero
46 # chr-copy loop starts here:
47 # for (i = 0; i < n && src[i] != '\0'; i++)
48 # dest[i] = src[i];
49 # VL (and r1) = MIN(CTR,MAXVL=4)
50 "setvl 1,0,%d,0,1,1" % maxvl,
51 # load VL bytes (update r10 addr)
52 "sv.lbzu/pi *16, 1(10)", # should be /lf here as well
53 "sv.cmpi/ff=eq/vli *0,1,*16,0", # cmp against zero, truncate VL
54 # store VL bytes (update r12 addr)
55 "sv.stbu/pi *16, 1(12)",
56 "sv.bc/all 0, *2, -0x1c", # test CTR, stop if cmpi failed
57 # zeroing loop starts here:
58 # for ( ; i < n; i++)
59 # dest[i] = '\0';
60 # VL (and r1) = MIN(CTR,MAXVL=4)
61 "setvl 1,0,%d,0,1,1" % maxvl,
62 # store VL zeros (update r12 addr)
63 "sv.stbu/pi 0, 1(12)",
64 "sv.bc 16, *0, -0xc", # dec CTR by VL, stop at zero
65 ]
66 )
67 lst = list(lst)
68
69 tst_string = "hello\x00bye\x00"
70 initial_regs = [0] * 32
71 initial_regs[3] = len(tst_string) # including the zero
72 initial_regs[10] = 16 # load address
73 initial_regs[12] = 40 # store address
74
75 # some memory with identifying garbage in it
76 initial_mem = {16: 0xf0f1_f2f3_f4f5_f6f7,
77 24: 0x4041_4243_4445_4647,
78 40: 0x8081_8283_8485_8687,
79 48: 0x9091_9293_9495_9697,
80 }
81
82 for i, c in enumerate(tst_string):
83 write_byte(initial_mem, 16+i, ord(c))
84
85 # now get the expected results: copy the string to the other address,
86 # but terminate at first zero (strncpy, duh)
87 expected_mem = deepcopy(initial_mem)
88 copyzeros = False
89 strlen = 0
90 for i, c in enumerate(tst_string):
91 c = ord(c)
92 if not copyzeros:
93 write_byte(expected_mem, 40+i, c)
94 strlen = i+1
95 else:
96 write_byte(expected_mem, 40+i, 0)
97 if c == 0:
98 copyzeros = True
99
100 with Program(lst, bigendian=False) as program:
101 sim = self.run_tst_program(program, initial_mem=initial_mem,
102 initial_regs=initial_regs)
103 mem = sim.mem.dump(printout=True, asciidump=True)
104 #print (mem)
105 # contents of memory expected at:
106 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
107 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
108 # therefore, at address 0x10 ==> 0x1234
109 # therefore, at address 0x28 ==> 0x1235
110 for (k, val) in expected_mem.items():
111 print("mem, val", k, hex(val))
112 self.assertEqual(mem, list(expected_mem.items()))
113 print(sim.gpr(1))
114 # reg 10 (the LD EA) is expected to be nearest
115 # 16 + strlen, rounded up
116 rounded = ((strlen+maxvl-1) // maxvl) * maxvl
117 self.assertEqual(sim.gpr(10), SelectableInt(16+rounded, 64))
118 # whereas reg 10 (the ST EA) is expected to be 40+strlen
119 self.assertEqual(sim.gpr(12), SelectableInt(
120 40+len(tst_string), 64))
121
122 def test_sv_load_store_postinc(self):
123 """>>> lst = ["addi 20, 0, 0x0010",
124 "addi 3, 0, 0x0008",
125 "addi 4, 0, 0x1234",
126 "addi 5, 0, 0x1235",
127 "sv.stwu/pi *4, 24(20)",
128 "sv.lwu/pi *8, 24(20)"]
129
130 element stride is computed as:
131 for i in range(VL):
132 EA = (RA|0) + EXTS(D) * i
133
134 load-update with post-increment will do this however:
135 for i in range(VL):
136 *vector = MEM(RA)
137 EA = (RA|0) + EXTS(D)
138 RA = EA # update RA *after*
139
140 whereas without post-increment it would be:
141 for i in range(VL):
142 EA = (RA|0) + EXTS(D) # EA calculated (and used) *BEFORE* load
143 *vector = MEM(EA)
144 RA = EA # still updated after but it's used before
145 """
146 lst = SVP64Asm(["addi 20, 0, 0x0010",
147 "addi 22, 0, 0x0010",
148 "addi 3, 0, 0x0008",
149 "addi 4, 0, 0x1234",
150 "addi 5, 0, 0x1235",
151 "sv.stwu/pi *4, 24(22)", # scalar r22 += 24 on update
152 "sv.lwzu/pi *8, 24(20)" # scalar r20 += 24 on update
153 ])
154 lst = list(lst)
155
156 # SVSTATE (in this case, VL=2)
157 svstate = SVP64State()
158 svstate.vl = 2 # VL
159 svstate.maxvl = 2 # MAXVL
160 print("SVSTATE", bin(svstate.asint()))
161
162 with Program(lst, bigendian=False) as program:
163 sim = self.run_tst_program(program, svstate=svstate)
164 mem = sim.mem.dump(printout=False)
165 print(mem)
166 # contents of memory expected at:
167 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
168 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
169 # therefore, at address 0x10 ==> 0x1234
170 # therefore, at address 0x28 ==> 0x1235
171 expected_mem = [(16, 0x1234),
172 (40, 0x1235)]
173 self.assertEqual(mem, expected_mem)
174 print(sim.gpr(1))
175 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
176 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
177 # reg 20 (the EA) is expected to be the initial 16,
178 # plus 2x24 (2 lots of immediates). 16+2*24=64
179 self.assertEqual(sim.gpr(20), SelectableInt(64, 64))
180 # likewise, reg 22 - for the store - also 16+2*24.
181 self.assertEqual(sim.gpr(22), SelectableInt(64, 64))
182
183 def test_sv_load_store_elementstride(self):
184 """>>> lst = ["addi 2, 0, 0x0010",
185 "addi 3, 0, 0x0008",
186 "addi 4, 0, 0x1234",
187 "addi 5, 0, 0x1235",
188 "sv.stw/els *4, 16(2)",
189 "sv.lwz/els *8, 16(2)"]
190
191 note: element stride mode is only enabled when RA is a scalar
192 and when the immediate is non-zero
193
194 element stride is computed as:
195 for i in range(VL):
196 EA = (RA|0) + EXTS(D) * i
197 """
198 lst = SVP64Asm(["addi 2, 0, 0x0010",
199 "addi 3, 0, 0x0008",
200 "addi 4, 0, 0x1234",
201 "addi 5, 0, 0x1235",
202 "sv.stw/els *4, 24(2)", # scalar r1 + 16 + 24*offs
203 "sv.lwz/els *8, 24(2)"]) # scalar r1 + 16 + 24*offs
204 lst = list(lst)
205
206 # SVSTATE (in this case, VL=2)
207 svstate = SVP64State()
208 svstate.vl = 2 # VL
209 svstate.maxvl = 2 # MAXVL
210 print("SVSTATE", bin(svstate.asint()))
211
212 with Program(lst, bigendian=False) as program:
213 sim = self.run_tst_program(program, svstate=svstate)
214 mem = sim.mem.dump(printout=False)
215 print(mem)
216 # contents of memory expected at:
217 # element 0: r1=0x10, D=24, => EA = 0x10+24*0 = 16 (0x10)
218 # element 1: r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
219 # therefore, at address 0x10 ==> 0x1234
220 # therefore, at address 0x28 ==> 0x1235
221 expected_mem = [(16, 0x1234),
222 (40, 0x1235)]
223 self.assertEqual(mem, expected_mem)
224 print(sim.gpr(1))
225 self.assertEqual(sim.gpr(8), SelectableInt(0x1234, 64))
226 self.assertEqual(sim.gpr(9), SelectableInt(0x1235, 64))
227
228 def test_sv_load_store_unitstride(self):
229 """>>> lst = ["addi 1, 0, 0x0010",
230 "addi 2, 0, 0x0008",
231 "addi 5, 0, 0x1234",
232 "addi 6, 0, 0x1235",
233 "sv.stw *8, 8(1)",
234 "sv.lwz *12, 8(1)"]
235
236 note: unit stride mode is only enabled when RA is a scalar.
237
238 unit stride is computed as:
239 for i in range(VL):
240 EA = (RA|0) + EXTS(D) + LDSTsize * i
241 where for stw and lwz, LDSTsize is 4 because it is 32-bit words
242 """
243 lst = SVP64Asm(["addi 1, 0, 0x0010",
244 "addi 2, 0, 0x0008",
245 "addi 8, 0, 0x1234",
246 "addi 9, 0, 0x1235",
247 "sv.stw *8, 8(1)", # scalar r1 + 8 + wordlen*offs
248 "sv.lwz *12, 8(1)"]) # scalar r1 + 8 + wordlen*offs
249 lst = list(lst)
250
251 # SVSTATE (in this case, VL=2)
252 svstate = SVP64State()
253 svstate.vl = 2 # VL
254 svstate.maxvl = 2 # MAXVL
255 print("SVSTATE", bin(svstate.asint()))
256
257 with Program(lst, bigendian=False) as program:
258 sim = self.run_tst_program(program, svstate=svstate)
259 mem = sim.mem.dump(printout=False)
260 print("Mem")
261 print(mem)
262 # contents of memory expected at:
263 # element 0: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*0 = 0x24
264 # element 1: r1=0x10, D=8, wordlen=4 => EA = 0x10+8+4*8 = 0x28
265 # therefore, at address 0x24 ==> 0x1234
266 # therefore, at address 0x28 ==> 0x1235
267 self.assertEqual(mem, [(24, 0x123500001234)])
268 print(sim.gpr(1))
269 self.assertEqual(sim.gpr(12), SelectableInt(0x1234, 64))
270 self.assertEqual(sim.gpr(13), SelectableInt(0x1235, 64))
271
272 @unittest.skip("deprecated, needs Scalar LDST-shifted")
273 def test_sv_load_store_shifted(self):
274 """>>> lst = ["addi 1, 0, 0x0010",
275 "addi 2, 0, 0x0004",
276 "addi 3, 0, 0x0002",
277 "addi 4, 0, 0x101",
278 "addi 5, 0, 0x202",
279 "addi 6, 0, 0x303",
280 "addi 7, 0, 0x404",
281 "sv.stw *4, 0(1)",
282 "sv.lwzsh *12, 4(1), 2"]
283
284 shifted LD is computed as:
285 for i in range(VL):
286 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
287 """
288 lst = SVP64Asm(["addi 1, 0, 0x0010",
289 "addi 2, 0, 0x0000",
290 "addi 4, 0, 0x101",
291 "addi 5, 0, 0x202",
292 "addi 6, 0, 0x303",
293 "addi 7, 0, 0x404",
294 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
295 "sv.lwzsh *12, 4(1), 2"]) # bit-reversed
296 lst = list(lst)
297
298 # SVSTATE (in this case, VL=4)
299 svstate = SVP64State()
300 svstate.vl = 4 # VL
301 svstate.maxvl = 4 # MAXVL
302 print("SVSTATE", bin(svstate.asint()))
303
304 with Program(lst, bigendian=False) as program:
305 sim = self.run_tst_program(program, svstate=svstate)
306 mem = sim.mem.dump(printout=False)
307 print(mem)
308
309 self.assertEqual(mem, [(16, 0x020200000101),
310 (24, 0x040400000303)])
311 print(sim.gpr(1))
312 # from STs
313 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
314 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
315 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
316 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
317 # r1=0x10, RC=0, offs=4: contents of memory expected at:
318 # element 0: EA = r1 + 0b00*4 => 0x10 + 0b00*4 => 0x10
319 # element 1: EA = r1 + 0b01*4 => 0x10 + 0b01*4 => 0x18
320 # element 2: EA = r1 + 0b10*4 => 0x10 + 0b10*4 => 0x14
321 # element 3: EA = r1 + 0b11*4 => 0x10 + 0b11*4 => 0x1c
322 # therefore loaded from (bit-reversed indexing):
323 # r9 => mem[0x10] which was stored from r5
324 # r10 => mem[0x18] which was stored from r6
325 # r11 => mem[0x18] which was stored from r7
326 # r12 => mem[0x1c] which was stored from r8
327 self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64))
328 self.assertEqual(sim.gpr(13), SelectableInt(0x202, 64))
329 self.assertEqual(sim.gpr(14), SelectableInt(0x303, 64))
330 self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64))
331
332 @unittest.skip("deprecated, needs Scalar LDST-shifted")
333 def test_sv_load_store_shifted_fp(self):
334 """>>> lst = ["addi 1, 0, 0x0010",
335 "addi 2, 0, 0x0004",
336 "addi 3, 0, 0x0002",
337 "addi 4, 0, 0x101",
338 "addi 5, 0, 0x202",
339 "addi 6, 0, 0x303",
340 "addi 7, 0, 0x404",
341 "sv.std *4, 0(1)",
342 "sv.lfdbr *12, 4(1), 2"]
343
344 shifted LD is computed as:
345 for i in range(VL):
346 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
347 """
348 lst = SVP64Asm(["addi 1, 0, 0x0010",
349 "addi 2, 0, 0x0000",
350 "addi 4, 0, 0x101",
351 "addi 5, 0, 0x202",
352 "addi 6, 0, 0x303",
353 "addi 7, 0, 0x404",
354 "sv.std *4, 0(1)", # scalar r1 + 0 + wordlen*offs
355 "sv.lfdsh *12, 8(1), 2"]) # shifted
356 lst = list(lst)
357
358 # SVSTATE (in this case, VL=4)
359 svstate = SVP64State()
360 svstate.vl = 4 # VL
361 svstate.maxvl = 4 # MAXVL
362 print("SVSTATE", bin(svstate.asint()))
363
364 fprs = [0] * 32
365
366 with Program(lst, bigendian=False) as program:
367 sim = self.run_tst_program(program, svstate=svstate,
368 initial_fprs=fprs)
369 mem = sim.mem.dump(printout=False)
370 print(mem)
371
372 self.assertEqual(mem, [(16, 0x101),
373 (24, 0x202),
374 (32, 0x303),
375 (40, 0x404),
376 ])
377 print(sim.gpr(1))
378 # from STs
379 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
380 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
381 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
382 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
383 # r1=0x10, RC=0, offs=4: contents of memory expected at:
384 # element 0: EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
385 # element 1: EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
386 # element 2: EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
387 # element 3: EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
388 # therefore loaded from (bit-reversed indexing):
389 # r9 => mem[0x10] which was stored from r5
390 # r10 => mem[0x18] which was stored from r6
391 # r11 => mem[0x18] which was stored from r7
392 # r12 => mem[0x1c] which was stored from r8
393 self.assertEqual(sim.fpr(12), SelectableInt(0x101, 64))
394 self.assertEqual(sim.fpr(13), SelectableInt(0x202, 64))
395 self.assertEqual(sim.fpr(14), SelectableInt(0x303, 64))
396 self.assertEqual(sim.fpr(15), SelectableInt(0x404, 64))
397
398 @unittest.skip("deprecated, needs Scalar LDST-shifted")
399 def test_sv_load_store_shifted2(self):
400 """>>> lst = ["addi 1, 0, 0x0010",
401 "addi 2, 0, 0x0004",
402 "addi 3, 0, 0x0002",
403 "sv.stfs *4, 0(1)",
404 "sv.lfssh *12, 4(1), 2"]
405
406 shifted LD is computed as:
407 for i in range(VL):
408 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
409
410 """
411 lst = SVP64Asm(["addi 1, 0, 0x0010",
412 "addi 2, 0, 0x0000",
413 "sv.stfs *4, 0(1)", # scalar r1 + 0 + wordlen*offs
414 "sv.lfssh *12, 4(1), 2"]) # shifted (by zero, but hey)
415 lst = list(lst)
416
417 # SVSTATE (in this case, VL=4)
418 svstate = SVP64State()
419 svstate.vl = 4 # VL
420 svstate.maxvl = 4 # MAXVL
421 print("SVSTATE", bin(svstate.asint()))
422
423 fprs = [0] * 32
424 scalar_a = 1.3
425 scalar_b = -2.0
426 fprs[4] = fp64toselectable(1.0)
427 fprs[5] = fp64toselectable(2.0)
428 fprs[6] = fp64toselectable(3.0)
429 fprs[7] = fp64toselectable(4.0)
430
431 # expected results, remember that bit-reversed load has been done
432 expected_fprs = deepcopy(fprs)
433 expected_fprs[12] = fprs[4] # 0b00 -> 0b00
434 expected_fprs[13] = fprs[5] # 0b10 -> 0b01
435 expected_fprs[14] = fprs[6] # 0b01 -> 0b10
436 expected_fprs[15] = fprs[7] # 0b11 -> 0b11
437
438 with Program(lst, bigendian=False) as program:
439 sim = self.run_tst_program(program, svstate=svstate,
440 initial_fprs=fprs)
441 mem = sim.mem.dump(printout=False)
442 print("mem dump")
443 print(mem)
444
445 print("FPRs")
446 sim.fpr.dump()
447
448 # self.assertEqual(mem, [(16, 0x020200000101),
449 # (24, 0x040400000303)])
450 self._check_fpregs(sim, expected_fprs)
451
452 def test_sv_load_store_remap_matrix(self):
453 """>>> lst = ["addi 1, 0, 0x0010",
454 "addi 2, 0, 0x0004",
455 "addi 3, 0, 0x0002",
456 "addi 5, 0, 0x101",
457 "addi 6, 0, 0x202",
458 "addi 7, 0, 0x303",
459 "addi 8, 0, 0x404",
460 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
461 "svshape 3, 3, 4, 0, 0",
462 "svremap 1, 1, 2, 0, 0, 0, 0",
463 "sv.lwz *20, 0(1)",
464 ]
465
466 REMAPed a LD operation via a Matrix Multiply Schedule,
467 which is set up as 3x4 result
468 """
469 lst = SVP64Asm(["addi 1, 0, 0x0010",
470 "addi 2, 0, 0x0000",
471 "addi 4, 0, 0x101",
472 "addi 5, 0, 0x202",
473 "addi 6, 0, 0x303",
474 "addi 7, 0, 0x404",
475 "addi 8, 0, 0x505",
476 "addi 9, 0, 0x606",
477 "addi 10, 0, 0x707",
478 "addi 11, 0, 0x808",
479 "addi 12, 0, 0x909",
480 "addi 13, 0, 0xa0a",
481 "addi 14, 0, 0xb0b",
482 "addi 15, 0, 0xc0c",
483 "addi 16, 0, 0xd0d",
484 "addi 17, 0, 0xe0e",
485 "addi 18, 0, 0xf0f",
486 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
487 "svshape 3, 3, 4, 0, 0",
488 "svremap 1, 1, 2, 0, 0, 0, 0",
489 "sv.lwz *20, 0(1)",
490 ])
491 lst = list(lst)
492
493 # SVSTATE (in this case, VL=4)
494 svstate = SVP64State()
495 svstate.vl = 12 # VL
496 svstate.maxvl = 12 # MAXVL
497 print("SVSTATE", bin(svstate.asint()))
498
499 regs = [0] * 64
500
501 with Program(lst, bigendian=False) as program:
502 sim = self.run_tst_program(program, svstate=svstate,
503 initial_regs=regs)
504 mem = sim.mem.dump(printout=False)
505 print("Mem")
506 print(mem)
507
508 self.assertEqual(mem, [(16, 0x020200000101),
509 (24, 0x040400000303),
510 (32, 0x060600000505),
511 (40, 0x080800000707),
512 (48, 0x0a0a00000909),
513 (56, 0x0c0c00000b0b)])
514 print(sim.gpr(1))
515 # from STs
516 self.assertEqual(sim.gpr(4), SelectableInt(0x101, 64))
517 self.assertEqual(sim.gpr(5), SelectableInt(0x202, 64))
518 self.assertEqual(sim.gpr(6), SelectableInt(0x303, 64))
519 self.assertEqual(sim.gpr(7), SelectableInt(0x404, 64))
520 self.assertEqual(sim.gpr(8), SelectableInt(0x505, 64))
521 self.assertEqual(sim.gpr(9), SelectableInt(0x606, 64))
522 self.assertEqual(sim.gpr(10), SelectableInt(0x707, 64))
523 self.assertEqual(sim.gpr(11), SelectableInt(0x808, 64))
524 # combination of bit-reversed load with a Matrix REMAP
525 # schedule
526 for i in range(3):
527 self.assertEqual(sim.gpr(20+i), SelectableInt(0x101, 64))
528 self.assertEqual(sim.gpr(23+i), SelectableInt(0x505, 64))
529 self.assertEqual(sim.gpr(26+i), SelectableInt(0x909, 64))
530 self.assertEqual(sim.gpr(29+i), SelectableInt(0x202, 64))
531
532 def test_sv_load_store_bitreverse_remap_halfswap(self):
533 """>>> lst = ["addi 1, 0, 0x0010",
534 "addi 2, 0, 0x0000",
535 "addi 4, 0, 0x101",
536 "addi 5, 0, 0x202",
537 "addi 6, 0, 0x303",
538 "addi 7, 0, 0x404",
539 "addi 8, 0, 0x505",
540 "addi 9, 0, 0x606",
541 "addi 10, 0, 0x707",
542 "addi 11, 0, 0x808",
543 "sv.stw *5, 0(1)",
544 "svshape 8, 1, 1, 6, 0",
545 "svremap 31, 1, 2, 3, 0, 0, 0",
546 "sv.lwz/els *12, 4(1)"]
547
548 shifted LD is computed as:
549 for i in range(VL):
550 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
551
552 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
553 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
554
555 and thus creates the butterfly needed for one iteration of FFT.
556 the RC (shift) is to be able to offset the LDs by Radix-2 spans
557
558 on top of the bit-reversal is a REMAP for half-swaps for DCT
559 in-place.
560 """
561 lst = SVP64Asm(["addi 1, 0, 0x0010",
562 "addi 2, 0, 0x0000",
563 "addi 4, 0, 0x001",
564 "addi 5, 0, 0x102",
565 "addi 6, 0, 0x203",
566 "addi 7, 0, 0x304",
567 "addi 8, 0, 0x405",
568 "addi 9, 0, 0x506",
569 "addi 10, 0, 0x607",
570 "addi 11, 0, 0x708",
571 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
572 "svshape 8, 1, 1, 6, 0",
573 "svremap 1, 0, 0, 0, 0, 0, 0",
574 #"setvl 0, 0, 8, 0, 1, 1",
575 "sv.lwz/els *12, 4(1)",
576 #"sv.lwz *12, 0(1)"
577 ])
578 lst = list(lst)
579
580 # SVSTATE (in this case, VL=4)
581 svstate = SVP64State()
582 svstate.vl = 8 # VL
583 svstate.maxvl = 8 # MAXVL
584 print("SVSTATE", bin(svstate.asint()))
585
586 regs = [0] * 64
587
588 avi = [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
589 n = len(avi)
590 levels = n.bit_length() - 1
591 ri = list(range(n))
592 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
593 av = halfrev2(avi, False)
594 av = [av[ri[i]] for i in range(n)]
595
596 with Program(lst, bigendian=False) as program:
597 sim = self.run_tst_program(program, svstate=svstate,
598 initial_regs=regs)
599 mem = sim.mem.dump(printout=False)
600 print("Mem")
601 print(mem)
602
603 self.assertEqual(mem, [(16, 0x010200000001),
604 (24, 0x030400000203),
605 (32, 0x050600000405),
606 (40, 0x070800000607)])
607 # from STs
608 for i in range(len(avi)):
609 print("st gpr", i, sim.gpr(i+4), hex(avi[i]))
610 for i in range(len(avi)):
611 self.assertEqual(sim.gpr(i+4), avi[i])
612 # combination of bit-reversed load with a DCT half-swap REMAP
613 # schedule
614 for i in range(len(avi)):
615 print("ld gpr", i, sim.gpr(i+12), hex(av[i]))
616 for i in range(len(avi)):
617 self.assertEqual(sim.gpr(i+12), av[i])
618
619 def test_sv_load_store_bitreverse_remap_halfswap_idct(self):
620 """>>> lst = ["addi 1, 0, 0x0010",
621 "addi 2, 0, 0x0000",
622 "addi 4, 0, 0x101",
623 "addi 5, 0, 0x202",
624 "addi 6, 0, 0x303",
625 "addi 7, 0, 0x404",
626 "addi 8, 0, 0x505",
627 "addi 9, 0, 0x606",
628 "addi 10, 0, 0x707",
629 "addi 11, 0, 0x808",
630 "sv.stw *5, 0(1)",
631 "svshape 8, 1, 1, 6, 0",
632 "svremap 31, 1, 2, 3, 0, 0, 0",
633 "sv.lwz/els *12, 4(1)"]
634
635 bitreverse LD is computed as:
636 for i in range(VL):
637 EA = (RA|0) + (EXTS(D) * LDSTsize * i) << RC
638
639 bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
640 produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11
641
642 and thus creates the butterfly needed for one iteration of FFT.
643 the RC (shift) is to be able to offset the LDs by Radix-2 spans
644
645 on top of the bit-reversal is a REMAP for half-swaps for DCT
646 in-place.
647 """
648 lst = SVP64Asm(["addi 1, 0, 0x0010",
649 "addi 2, 0, 0x0000",
650 "addi 4, 0, 0x001",
651 "addi 5, 0, 0x102",
652 "addi 6, 0, 0x203",
653 "addi 7, 0, 0x304",
654 "addi 8, 0, 0x405",
655 "addi 9, 0, 0x506",
656 "addi 10, 0, 0x607",
657 "addi 11, 0, 0x708",
658 "sv.stw *4, 0(1)", # scalar r1 + 0 + wordlen*offs
659 "svshape 8, 1, 1, 14, 0",
660 "svremap 16, 0, 0, 0, 0, 0, 0",
661 #"setvl 0, 0, 8, 0, 1, 1",
662 "sv.lwz/els *12, 4(1)",
663 #"sv.lwz *12, 0(1)"
664 ])
665 lst = list(lst)
666
667 # SVSTATE (in this case, VL=4)
668 svstate = SVP64State()
669 svstate.vl = 8 # VL
670 svstate.maxvl = 8 # MAXVL
671 print("SVSTATE", bin(svstate.asint()))
672
673 regs = [0] * 64
674
675 avi = [0x001, 0x102, 0x203, 0x304, 0x405, 0x506, 0x607, 0x708]
676 n = len(avi)
677 levels = n.bit_length() - 1
678 ri = list(range(n))
679 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
680 av = [avi[ri[i]] for i in range(n)]
681 av = halfrev2(av, True)
682
683 with Program(lst, bigendian=False) as program:
684 sim = self.run_tst_program(program, svstate=svstate,
685 initial_regs=regs)
686 mem = sim.mem.dump(printout=False)
687 print("Mem")
688 print(mem)
689
690 self.assertEqual(mem, [(16, 0x010200000001),
691 (24, 0x030400000203),
692 (32, 0x050600000405),
693 (40, 0x070800000607)])
694 # from STs
695 for i in range(len(avi)):
696 print("st gpr", i, sim.gpr(i+4), hex(avi[i]))
697 for i in range(len(avi)):
698 self.assertEqual(sim.gpr(i+4), avi[i])
699 # combination of bit-reversed load with a DCT half-swap REMAP
700 # schedule
701 for i in range(len(avi)):
702 print("ld gpr", i, sim.gpr(i+12), hex(av[i]))
703 for i in range(len(avi)):
704 self.assertEqual(sim.gpr(i+12), av[i])
705
706 def test_sv_load_dd_ffirst_excl(self):
707 """data-dependent fail-first on LD/ST, exclusive (VLi=0)
708 """
709 lst = SVP64Asm(
710 [
711 # load VL bytes but test if they are zero and truncate
712 "sv.lbz/ff=RC1 *16, 1(10)", # deliberately offset by 1
713 ]
714 )
715 lst = list(lst)
716
717 # SVSTATE (in this case, VL=8)
718 svstate = SVP64State()
719 svstate.vl = 8 # VL
720 svstate.maxvl = 8 # MAXVL
721 print("SVSTATE", bin(svstate.asint()))
722
723 tst_string = "hel\x00e\x00"
724 initial_regs = [0] * 32
725 initial_regs[3] = len(tst_string) # including the zero
726 initial_regs[10] = 16 # load address
727 initial_regs[12] = 40 # store address
728 for i in range(8): # set to garbage
729 initial_regs[16+i] = (0xbeef00) + i # identifying garbage
730
731 # calculate expected regs
732 expected_regs = deepcopy(initial_regs)
733 for i, c in enumerate(tst_string[1:]): # note the offset 1(10)
734 c = ord(c)
735 if c == 0: break # strcpy stop at NUL
736 expected_regs[16+i] = c
737
738 # some memory with identifying garbage in it
739 initial_mem = {16: 0xf0f1_f2f3_f4f5_f6f7,
740 24: 0x4041_4243_4445_4647,
741 40: 0x8081_8283_8485_8687,
742 48: 0x9091_9293_9495_9697,
743 }
744
745 for i, c in enumerate(tst_string):
746 write_byte(initial_mem, 16+i, ord(c))
747
748 with Program(lst, bigendian=False) as program:
749 sim = self.run_tst_program(program, svstate=svstate,
750 initial_mem=initial_mem,
751 initial_regs=initial_regs)
752 mem = sim.mem.dump(printout=True, asciidump=True)
753 print (mem)
754 self.assertEqual(sim.svstate.vl, 2)
755 for i in range(len(expected_regs)):
756 print ("%i %x %x" % (i, sim.gpr(i).value, expected_regs[i]))
757 self.assertEqual(sim.gpr(i), expected_regs[i])
758
759 def test_sv_load_dd_ffirst_incl(self):
760 """data-dependent fail-first on LD/ST, inclusive (/vli)
761 """
762 lst = SVP64Asm(
763 [
764 # load VL bytes but test if they are zero and truncate
765 "sv.lbz/ff=RC1/vli *16, 1(10)", # deliberately offset by 1
766 ]
767 )
768 lst = list(lst)
769
770 # SVSTATE (in this case, VL=8)
771 svstate = SVP64State()
772 svstate.vl = 8 # VL
773 svstate.maxvl = 8 # MAXVL
774 print("SVSTATE", bin(svstate.asint()))
775
776 tst_string = "hel\x00e\x00"
777 initial_regs = [0] * 32
778 initial_regs[3] = len(tst_string) # including the zero
779 initial_regs[10] = 16 # load address
780 initial_regs[12] = 40 # store address
781 for i in range(8): # set to garbage
782 initial_regs[16+i] = (0xbeef00) + i # identifying garbage
783
784 # calculate expected regs
785 expected_regs = deepcopy(initial_regs)
786 for i, c in enumerate(tst_string[1:]): # note the offset 1(10)
787 c = ord(c)
788 expected_regs[16+i] = c
789 if c == 0: break # strcpy stop at NUL *including* NUL
790
791 # some memory with identifying garbage in it
792 initial_mem = {16: 0xf0f1_f2f3_f4f5_f6f7,
793 24: 0x4041_4243_4445_4647,
794 40: 0x8081_8283_8485_8687,
795 48: 0x9091_9293_9495_9697,
796 }
797
798 for i, c in enumerate(tst_string):
799 write_byte(initial_mem, 16+i, ord(c))
800
801 with Program(lst, bigendian=False) as program:
802 sim = self.run_tst_program(program, svstate=svstate,
803 initial_mem=initial_mem,
804 initial_regs=initial_regs)
805 mem = sim.mem.dump(printout=True, asciidump=True)
806 print (mem)
807 self.assertEqual(sim.svstate.vl, 3)
808 for i in range(len(expected_regs)):
809 print ("%i %x %x" % (i, sim.gpr(i).value, expected_regs[i]))
810 self.assertEqual(sim.gpr(i), expected_regs[i])
811
812 def test_sv_load_dd_ffirst_incl(self):
813 """data-dependent fail-first on LD/ST, inclusive (/vli)
814 performs linked-list walking
815 """
816 lst = SVP64Asm(
817 [
818 # load VL bytes but test if they are zero and truncate
819 "sv.ld/ff=RC1/vli *17, 8(*16)", # offset 8 to next addr
820 ]
821 )
822 lst = list(lst)
823
824 # SVSTATE (in this case, VL=8)
825 svstate = SVP64State()
826 svstate.vl = 8 # VL
827 svstate.maxvl = 8 # MAXVL
828 print("SVSTATE", bin(svstate.asint()))
829
830 initial_regs = [0] * 32
831 for i in range(8): # set to garbage
832 initial_regs[16+i] = (0xbeef00) + i # identifying garbage
833 initial_regs[16] = 24 # data starting point
834
835 # some memory with addresses to get from. all locations are offset 8
836 initial_mem = { 24: 0xfeed0001, 32: 48, # data @ 24, ptr @ 32+8 -> 48
837 48: 0xfeed0002, 56: 8 , # data @ 48, ptr @ 48+8 -> 8
838 8 : 0xfeed0003, 16: 80, # data @ 16, ptr @ 16+8 -> 80
839 80: 0xfeed0004, 88: 0, # data @ 80, ptr @ 80+8 -> 0
840 }
841
842 # calculate expected regs
843 expected_regs = deepcopy(initial_regs)
844 ptr_addr = 24
845 i = 0
846 while True: # VLI needs break at end
847 expected_regs[16+i] = ptr_addr
848 print ("expected regs", 16+i, hex(expected_regs[16+i]))
849 i += 1
850 if ptr_addr == 0: break
851 print ("ptr_addr", ptr_addr)
852 ptr_addr = initial_mem[ptr_addr+8] # linked-list walk, offset 8
853
854 with Program(lst, bigendian=False) as program:
855 sim = self.run_tst_program(program, svstate=svstate,
856 initial_mem=initial_mem,
857 initial_regs=initial_regs)
858 mem = sim.mem.dump(printout=True, asciidump=True)
859 print (mem)
860 self.assertEqual(sim.svstate.vl, 4)
861 for i in range(len(expected_regs)):
862 print ("%i %x %x" % (i, sim.gpr(i).value, expected_regs[i]))
863 self.assertEqual(sim.gpr(i), expected_regs[i])
864
865 def test_sv_load_update_dd_ffirst_incl(self):
866 """data-dependent fail-first on LD/ST-with-update, inclusive (/vli)
867 performs linked-list walking, and stores the Effective Address
868 *behind* where it is picked up (on the next element-iteration).
869 """
870 lst = SVP64Asm(
871 [
872 # load VL bytes but test if they are zero and truncate
873 "sv.ldu/ff=RC1/vli *17, 8(*16)", # offset 8 to next addr
874 ]
875 )
876 lst = list(lst)
877
878 # SVSTATE (in this case, VL=8)
879 svstate = SVP64State()
880 svstate.vl = 8 # VL
881 svstate.maxvl = 8 # MAXVL
882 print("SVSTATE", bin(svstate.asint()))
883
884 initial_regs = [0] * 32
885 for i in range(8): # set to garbage
886 initial_regs[16+i] = (0xbeef00) + i # identifying garbage
887 initial_regs[16] = 24 # data starting point
888
889 # some memory with addresses to get from. all locations are offset 8
890 initial_mem = { 24: 0xfeed0001, 32: 48, # data @ 24, ptr @ 32+8 -> 48
891 48: 0xfeed0002, 56: 8 , # data @ 48, ptr @ 48+8 -> 8
892 8 : 0xfeed0003, 16: 80, # data @ 16, ptr @ 16+8 -> 80
893 80: 0xfeed0004, 88: 0, # data @ 80, ptr @ 80+8 -> 0
894 }
895
896 # calculate expected regs
897 expected_regs = deepcopy(initial_regs)
898 i = 0
899 while True: # VLI needs break at end
900 ptr_addr = expected_regs[16+i]
901 newptr_addr = initial_mem[ptr_addr+8] # linked-list walk, offset 8
902 expected_regs[17+i] = newptr_addr
903 expected_regs[16+i] = ptr_addr+8
904 print ("expected regs", 16+i, hex(expected_regs[16+i]))
905 i += 1
906 print ("ptr_addr", ptr_addr)
907 if newptr_addr == 0: break # VLI stop at end
908
909 with Program(lst, bigendian=False) as program:
910 sim = self.run_tst_program(program, svstate=svstate,
911 initial_mem=initial_mem,
912 initial_regs=initial_regs)
913 mem = sim.mem.dump(printout=True, asciidump=True)
914 print (mem)
915 self.assertEqual(sim.svstate.vl, 4)
916 for i in range(len(expected_regs)):
917 print ("%i %x %x" % (i, sim.gpr(i).value, expected_regs[i]))
918 self.assertEqual(sim.gpr(i), expected_regs[i])
919
920 def run_tst_program(self, prog, initial_regs=None,
921 svstate=None, initial_fprs=None,
922 initial_mem=None):
923 if initial_regs is None:
924 initial_regs = [0] * 32
925 if initial_fprs is None:
926 initial_fprs = [0] * 32
927 simulator = run_tst(prog, initial_regs, svstate=svstate,
928 initial_fprs=initial_fprs,
929 mem=initial_mem)
930 print("GPRs")
931 simulator.gpr.dump()
932 print("FPRs")
933 simulator.fpr.dump()
934 return simulator
935
936
937 if __name__ == "__main__":
938 unittest.main()