0a0feac1e
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_chacha20.py
1 """SVP64 unit test for svindex
2 svindex SVG,rmm,SVd,ew,yx,mm,sk
3 """
4 import unittest
5 from copy import deepcopy
6
7 from nmutil.formaltest import FHDLTestCase
8 from openpower.decoder.isa.caller import SVP64State, set_masked_reg
9 from openpower.decoder.isa.test_caller import run_tst
10 from openpower.decoder.selectable_int import SelectableInt
11 from openpower.simulator.program import Program
12 from openpower.insndb.asm import SVP64Asm
13
14
15 # originally from https://github.com/pts/chacha20
16 def quarter_round_schedule(x, a, b, c, d):
17 """collate list of reg-offsets for use with svindex/svremap
18 """
19 #x[a] = (x[a] + x[b]) & 0xffffffff
20 #x[d] = x[d] ^ x[a]
21 #x[d] = rotate(x[d], 16)
22 x.append((a, b, d, 16))
23
24 #x[c] = (x[c] + x[d]) & 0xffffffff
25 #x[b] = x[b] ^ x[c]
26 #x[b] = rotate(x[b], 12)
27 x.append((c, d, b, 12))
28
29 #x[a] = (x[a] + x[b]) & 0xffffffff
30 #x[d] = x[d] ^ x[a]
31 #x[d] = rotate(x[d], 8)
32 x.append((a, b, d, 8))
33
34 #x[c] = (x[c] + x[d]) & 0xffffffff
35 #x[b] = x[b] ^ x[c]
36 #x[b] = rotate(x[b], 7)
37 x.append((c, d, b, 7))
38
39
40 def rotl32(v, c):
41 c = c & 0x1f
42 res = ((v << c) & 0xffffffff) | v >> (32 - c)
43 print("op rotl32", hex(res), hex(v), hex(c))
44 return res
45
46
47 def add(a, b):
48 res = (a + b) & 0xffffffff
49 print("op add", hex(res), hex(a), hex(b))
50 return res
51
52
53 def xor(a, b):
54 res = a ^ b
55 print("op xor", hex(res), hex(a), hex(b))
56 return res
57
58
59 def sthth_round(x, a, b, d, rot):
60 x[a] = add(x[a], x[b])
61 x[d] = xor(x[d], x[a])
62 x[d] = rotl32(x[d], rot)
63
64
65 def quarter_round(x, a, b, c, d):
66 """collate list of reg-offsets for use with svindex/svremap
67 """
68 sthth_round(x, a, b, d, 16)
69 sthth_round(x, c, d, b, 12)
70 sthth_round(x, a, b, d, 8)
71 sthth_round(x, c, d, b, 7)
72
73
74 def chacha_idx_schedule(x, fn=quarter_round_schedule):
75 fn(x, 0, 4, 8, 12)
76 fn(x, 1, 5, 9, 13)
77 fn(x, 2, 6, 10, 14)
78 fn(x, 3, 7, 11, 15)
79 fn(x, 0, 5, 10, 15)
80 fn(x, 1, 6, 11, 12)
81 fn(x, 2, 7, 8, 13)
82 fn(x, 3, 4, 9, 14)
83
84
85 class SVSTATETestCase(FHDLTestCase):
86
87 def _check_regs(self, sim, expected):
88 print("GPR")
89 sim.gpr.dump()
90 for i in range(32):
91 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64),
92 "GPR %d %x expected %x" % \
93 (i, sim.gpr(i).value, expected[i]))
94
95 def test_1_sv_chacha20_main_rounds(self):
96 """chacha20 main rounds
97
98 RA, RB, RS and RT are set up via Indexing to perform the *individual*
99 add/xor/rotl32 operations (with elwidth=32)
100
101 the inner loop uses "svstep." which detects src/dst-step reaching
102 the end of the loop, setting CR0.eq=1. no need for an additional
103 counter-register-with-a-decrement. this has the side-effect of
104 freeing up CTR for use as a straight decrement-counter.
105
106 both loops are 100% deterministic meaning that there should be
107 *ZERO* branch-prediction misses, obviating a need for loop-unrolling.
108 """
109
110 nrounds = 2 # should be 10 for full algorithm
111
112 block = 24 # register for block of 16
113 vl = 22 # copy of VL placed in here
114 SHAPE0 = 8
115 SHAPE1 = 12
116 SHAPE2 = 16
117 shifts = 20 # registers for 4 32-bit shift amounts
118 ctr = 7 # register for CTR
119
120 isa = SVP64Asm([
121 # set up VL=32 vertical-first, and SVSHAPEs 0-2
122 # vertical-first, set MAXVL (and r17)
123 'setvl 0, 0, 32, 1, 1, 1', # vertical-first, set VL
124 'svindex %d, 0, 1, 3, 0, 1, 0' % (SHAPE0//2), # SVSHAPE0, a
125 'svindex %d, 1, 1, 3, 0, 1, 0' % (SHAPE1//2), # SVSHAPE1, b
126 'svindex %d, 2, 1, 3, 0, 1, 0' % (SHAPE2//2), # SVSHAPE2, c
127 'svshape2 0, 0, 3, 4, 0, 1', # SVSHAPE3, shift amount, mod 4
128 # establish CTR for outer round count
129 'addi %d, 0, %d' % (ctr, nrounds), # set number of rounds
130 'mtspr 9, %d' % ctr, # set CTR to number of rounds
131 # outer loop begins here (standard CTR loop)
132 'setvl 0, 0, 32, 1, 1, 1', # vertical-first, set VL
133 # inner loop begins here. add-xor-rotl32 with remap, step, branch
134 'svremap 31, 1, 0, 0, 0, 0, 0', # RA=1, RB=0, RT=0 (0b01011)
135 'sv.add/w=32 *%d, *%d, *%d' % (block, block, block),
136 'svremap 31, 2, 0, 2, 2, 0, 0', # RA=2, RB=0, RS=2 (0b00111)
137 'sv.xor/w=32 *%d, *%d, *%d' % (block, block, block),
138 'svremap 31, 0, 3, 2, 2, 0, 0', # RA=2, RB=3, RS=2 (0b01110)
139 'sv.rldcl/w=32 *%d, *%d, *%d, 0' % (block, block, shifts),
140 'svstep. %d, 1, 0' % ctr, # step to next in-regs element
141 'bc 6, 3, -0x28', # svstep. Rc=1 loop-end-condition?
142 # inner-loop done: outer loop standard CTR-decrement to setvl again
143 'bc 16, 0, -0x30',
144 ])
145 lst = list(isa)
146 print("listing", lst)
147
148 schedule = []
149 chacha_idx_schedule(schedule, fn=quarter_round_schedule)
150
151 # initial values in GPR regfile
152 initial_regs = [0] * 128
153
154 # offsets for a b c
155 for i, (a, b, c, d) in enumerate(schedule):
156 print ("chacha20 schedule", i, hex(a), hex(b), hex(c), hex(d))
157 set_masked_reg(initial_regs, SHAPE0, i, ew_bits=8, value=a)
158 set_masked_reg(initial_regs, SHAPE1, i, ew_bits=8, value=b)
159 set_masked_reg(initial_regs, SHAPE2, i, ew_bits=8, value=c)
160
161 # offsets for d (modulo 4 shift amount)
162 shiftvals = [16, 12, 8, 7] # chacha20 shifts
163 for i in range(4):
164 set_masked_reg(initial_regs, shifts, i, ew_bits=32,
165 value=shiftvals[i])
166
167 # set up input test vector then pack it into regs
168 x = [0] * 16
169 x[0] = 0x61707865
170 x[1] = 0x3320646e
171 x[2] = 0x79622d32
172 x[3] = 0x6b206574
173 x[4] = 0x6d8bc55e
174 x[5] = 0xa5e04f51
175 x[6] = 0xea0d1e6f
176 x[7] = 0x5a09dc7b
177 x[8] = 0x18b6f510
178 x[9] = 0x26f2b6bd
179 x[10] = 0x7b59cc2f
180 x[11] = 0xefb330b2
181 x[12] = 0xcff545a3
182 x[13] = 0x7c512380
183 x[14] = 0x75f0fcc0
184 x[15] = 0x5f868c74
185
186 for i in range(16):
187 set_masked_reg(initial_regs, block, i, ew_bits=32, value=x[i])
188
189 # SVSTATE vl=32
190 svstate = SVP64State()
191 #svstate.vl = 32 # VL
192 #svstate.maxvl = 32 # MAXVL
193 print("SVSTATE", bin(svstate.asint()))
194
195 # copy before running, compute expected results
196 expected_regs = deepcopy(initial_regs)
197 expected_regs[ctr] = 0 # reaches zero
198 #expected_regs[vl] = 32 # gets set to MAXVL
199 expected = deepcopy(x)
200 for i in range(nrounds):
201 chacha_idx_schedule(expected, fn=quarter_round)
202 for i in range(16):
203 set_masked_reg(expected_regs, block, i, ew_bits=32,
204 value=expected[i])
205
206 with Program(lst, bigendian=False) as program:
207 sim = self.run_tst_program(program, initial_regs, svstate=svstate)
208
209 # print out expected: 16 values @ 32-bit ea -> QTY8 64-bit regs
210 for i in range(8):
211 RS = sim.gpr(i+block).value
212 print("expected", i+block, hex(RS), hex(expected_regs[i+block]))
213
214 print(sim.spr)
215 SVSHAPE0 = sim.spr['SVSHAPE0']
216 SVSHAPE1 = sim.spr['SVSHAPE1']
217 print("SVSTATE after", bin(sim.svstate.asint()))
218 print(" vl", bin(sim.svstate.vl))
219 print(" mvl", bin(sim.svstate.maxvl))
220 print(" srcstep", bin(sim.svstate.srcstep))
221 print(" dststep", bin(sim.svstate.dststep))
222 print(" RMpst", bin(sim.svstate.RMpst))
223 print(" SVme", bin(sim.svstate.SVme))
224 print(" mo0", bin(sim.svstate.mo0))
225 print(" mo1", bin(sim.svstate.mo1))
226 print(" mi0", bin(sim.svstate.mi0))
227 print(" mi1", bin(sim.svstate.mi1))
228 print(" mi2", bin(sim.svstate.mi2))
229 print("STATE0svgpr", hex(SVSHAPE0.svgpr))
230 print("STATE0 xdim", SVSHAPE0.xdimsz)
231 print("STATE0 ydim", SVSHAPE0.ydimsz)
232 print("STATE0 skip", bin(SVSHAPE0.skip))
233 print("STATE0 inv", SVSHAPE0.invxyz)
234 print("STATE0order", SVSHAPE0.order)
235 print(sim.gpr.dump())
236 self._check_regs(sim, expected_regs)
237 self.assertEqual(sim.svstate.RMpst, 0)
238 self.assertEqual(sim.svstate.SVme, 0b11111)
239 self.assertEqual(sim.svstate.mi0, 0)
240 self.assertEqual(sim.svstate.mi1, 3)
241 self.assertEqual(sim.svstate.mi2, 2)
242 self.assertEqual(sim.svstate.mo0, 2)
243 self.assertEqual(sim.svstate.mo1, 0)
244 #self.assertEqual(SVSHAPE0.svgpr, 22)
245 #self.assertEqual(SVSHAPE1.svgpr, 30)
246
247 def run_tst_program(self, prog, initial_regs=None,
248 svstate=None):
249 if initial_regs is None:
250 initial_regs = [0] * 32
251 simulator = run_tst(prog, initial_regs, svstate=svstate)
252 simulator.gpr.dump()
253 return simulator
254
255
256 if __name__ == "__main__":
257 unittest.main()