0a0feac1e
1 """SVP64 unit test for svindex
2 svindex SVG,rmm,SVd,ew,yx,mm,sk
5 from copy
import deepcopy
7 from nmutil
.formaltest
import FHDLTestCase
8 from openpower
.decoder
.isa
.caller
import SVP64State
, set_masked_reg
9 from openpower
.decoder
.isa
.test_caller
import run_tst
10 from openpower
.decoder
.selectable_int
import SelectableInt
11 from openpower
.simulator
.program
import Program
12 from openpower
.insndb
.asm
import SVP64Asm
15 # originally from https://github.com/pts/chacha20
16 def quarter_round_schedule(x
, a
, b
, c
, d
):
17 """collate list of reg-offsets for use with svindex/svremap
19 #x[a] = (x[a] + x[b]) & 0xffffffff
21 #x[d] = rotate(x[d], 16)
22 x
.append((a
, b
, d
, 16))
24 #x[c] = (x[c] + x[d]) & 0xffffffff
26 #x[b] = rotate(x[b], 12)
27 x
.append((c
, d
, b
, 12))
29 #x[a] = (x[a] + x[b]) & 0xffffffff
31 #x[d] = rotate(x[d], 8)
32 x
.append((a
, b
, d
, 8))
34 #x[c] = (x[c] + x[d]) & 0xffffffff
36 #x[b] = rotate(x[b], 7)
37 x
.append((c
, d
, b
, 7))
42 res
= ((v
<< c
) & 0xffffffff) | v
>> (32 - c
)
43 print("op rotl32", hex(res
), hex(v
), hex(c
))
48 res
= (a
+ b
) & 0xffffffff
49 print("op add", hex(res
), hex(a
), hex(b
))
55 print("op xor", hex(res
), hex(a
), hex(b
))
59 def sthth_round(x
, a
, b
, d
, rot
):
60 x
[a
] = add(x
[a
], x
[b
])
61 x
[d
] = xor(x
[d
], x
[a
])
62 x
[d
] = rotl32(x
[d
], rot
)
65 def quarter_round(x
, a
, b
, c
, d
):
66 """collate list of reg-offsets for use with svindex/svremap
68 sthth_round(x
, a
, b
, d
, 16)
69 sthth_round(x
, c
, d
, b
, 12)
70 sthth_round(x
, a
, b
, d
, 8)
71 sthth_round(x
, c
, d
, b
, 7)
74 def chacha_idx_schedule(x
, fn
=quarter_round_schedule
):
85 class SVSTATETestCase(FHDLTestCase
):
87 def _check_regs(self
, sim
, expected
):
91 self
.assertEqual(sim
.gpr(i
), SelectableInt(expected
[i
], 64),
92 "GPR %d %x expected %x" % \
93 (i
, sim
.gpr(i
).value
, expected
[i
]))
95 def test_1_sv_chacha20_main_rounds(self
):
96 """chacha20 main rounds
98 RA, RB, RS and RT are set up via Indexing to perform the *individual*
99 add/xor/rotl32 operations (with elwidth=32)
101 the inner loop uses "svstep." which detects src/dst-step reaching
102 the end of the loop, setting CR0.eq=1. no need for an additional
103 counter-register-with-a-decrement. this has the side-effect of
104 freeing up CTR for use as a straight decrement-counter.
106 both loops are 100% deterministic meaning that there should be
107 *ZERO* branch-prediction misses, obviating a need for loop-unrolling.
110 nrounds
= 2 # should be 10 for full algorithm
112 block
= 24 # register for block of 16
113 vl
= 22 # copy of VL placed in here
117 shifts
= 20 # registers for 4 32-bit shift amounts
118 ctr
= 7 # register for CTR
121 # set up VL=32 vertical-first, and SVSHAPEs 0-2
122 # vertical-first, set MAXVL (and r17)
123 'setvl 0, 0, 32, 1, 1, 1', # vertical-first, set VL
124 'svindex %d, 0, 1, 3, 0, 1, 0' % (SHAPE0
//2), # SVSHAPE0, a
125 'svindex %d, 1, 1, 3, 0, 1, 0' % (SHAPE1
//2), # SVSHAPE1, b
126 'svindex %d, 2, 1, 3, 0, 1, 0' % (SHAPE2
//2), # SVSHAPE2, c
127 'svshape2 0, 0, 3, 4, 0, 1', # SVSHAPE3, shift amount, mod 4
128 # establish CTR for outer round count
129 'addi %d, 0, %d' % (ctr
, nrounds
), # set number of rounds
130 'mtspr 9, %d' % ctr
, # set CTR to number of rounds
131 # outer loop begins here (standard CTR loop)
132 'setvl 0, 0, 32, 1, 1, 1', # vertical-first, set VL
133 # inner loop begins here. add-xor-rotl32 with remap, step, branch
134 'svremap 31, 1, 0, 0, 0, 0, 0', # RA=1, RB=0, RT=0 (0b01011)
135 'sv.add/w=32 *%d, *%d, *%d' % (block
, block
, block
),
136 'svremap 31, 2, 0, 2, 2, 0, 0', # RA=2, RB=0, RS=2 (0b00111)
137 'sv.xor/w=32 *%d, *%d, *%d' % (block
, block
, block
),
138 'svremap 31, 0, 3, 2, 2, 0, 0', # RA=2, RB=3, RS=2 (0b01110)
139 'sv.rldcl/w=32 *%d, *%d, *%d, 0' % (block
, block
, shifts
),
140 'svstep. %d, 1, 0' % ctr
, # step to next in-regs element
141 'bc 6, 3, -0x28', # svstep. Rc=1 loop-end-condition?
142 # inner-loop done: outer loop standard CTR-decrement to setvl again
146 print("listing", lst
)
149 chacha_idx_schedule(schedule
, fn
=quarter_round_schedule
)
151 # initial values in GPR regfile
152 initial_regs
= [0] * 128
155 for i
, (a
, b
, c
, d
) in enumerate(schedule
):
156 print ("chacha20 schedule", i
, hex(a
), hex(b
), hex(c
), hex(d
))
157 set_masked_reg(initial_regs
, SHAPE0
, i
, ew_bits
=8, value
=a
)
158 set_masked_reg(initial_regs
, SHAPE1
, i
, ew_bits
=8, value
=b
)
159 set_masked_reg(initial_regs
, SHAPE2
, i
, ew_bits
=8, value
=c
)
161 # offsets for d (modulo 4 shift amount)
162 shiftvals
= [16, 12, 8, 7] # chacha20 shifts
164 set_masked_reg(initial_regs
, shifts
, i
, ew_bits
=32,
167 # set up input test vector then pack it into regs
187 set_masked_reg(initial_regs
, block
, i
, ew_bits
=32, value
=x
[i
])
190 svstate
= SVP64State()
191 #svstate.vl = 32 # VL
192 #svstate.maxvl = 32 # MAXVL
193 print("SVSTATE", bin(svstate
.asint()))
195 # copy before running, compute expected results
196 expected_regs
= deepcopy(initial_regs
)
197 expected_regs
[ctr
] = 0 # reaches zero
198 #expected_regs[vl] = 32 # gets set to MAXVL
199 expected
= deepcopy(x
)
200 for i
in range(nrounds
):
201 chacha_idx_schedule(expected
, fn
=quarter_round
)
203 set_masked_reg(expected_regs
, block
, i
, ew_bits
=32,
206 with
Program(lst
, bigendian
=False) as program
:
207 sim
= self
.run_tst_program(program
, initial_regs
, svstate
=svstate
)
209 # print out expected: 16 values @ 32-bit ea -> QTY8 64-bit regs
211 RS
= sim
.gpr(i
+block
).value
212 print("expected", i
+block
, hex(RS
), hex(expected_regs
[i
+block
]))
215 SVSHAPE0
= sim
.spr
['SVSHAPE0']
216 SVSHAPE1
= sim
.spr
['SVSHAPE1']
217 print("SVSTATE after", bin(sim
.svstate
.asint()))
218 print(" vl", bin(sim
.svstate
.vl
))
219 print(" mvl", bin(sim
.svstate
.maxvl
))
220 print(" srcstep", bin(sim
.svstate
.srcstep
))
221 print(" dststep", bin(sim
.svstate
.dststep
))
222 print(" RMpst", bin(sim
.svstate
.RMpst
))
223 print(" SVme", bin(sim
.svstate
.SVme
))
224 print(" mo0", bin(sim
.svstate
.mo0
))
225 print(" mo1", bin(sim
.svstate
.mo1
))
226 print(" mi0", bin(sim
.svstate
.mi0
))
227 print(" mi1", bin(sim
.svstate
.mi1
))
228 print(" mi2", bin(sim
.svstate
.mi2
))
229 print("STATE0svgpr", hex(SVSHAPE0
.svgpr
))
230 print("STATE0 xdim", SVSHAPE0
.xdimsz
)
231 print("STATE0 ydim", SVSHAPE0
.ydimsz
)
232 print("STATE0 skip", bin(SVSHAPE0
.skip
))
233 print("STATE0 inv", SVSHAPE0
.invxyz
)
234 print("STATE0order", SVSHAPE0
.order
)
235 print(sim
.gpr
.dump())
236 self
._check
_regs
(sim
, expected_regs
)
237 self
.assertEqual(sim
.svstate
.RMpst
, 0)
238 self
.assertEqual(sim
.svstate
.SVme
, 0b11111)
239 self
.assertEqual(sim
.svstate
.mi0
, 0)
240 self
.assertEqual(sim
.svstate
.mi1
, 3)
241 self
.assertEqual(sim
.svstate
.mi2
, 2)
242 self
.assertEqual(sim
.svstate
.mo0
, 2)
243 self
.assertEqual(sim
.svstate
.mo1
, 0)
244 #self.assertEqual(SVSHAPE0.svgpr, 22)
245 #self.assertEqual(SVSHAPE1.svgpr, 30)
247 def run_tst_program(self
, prog
, initial_regs
=None,
249 if initial_regs
is None:
250 initial_regs
= [0] * 32
251 simulator
= run_tst(prog
, initial_regs
, svstate
=svstate
)
256 if __name__
== "__main__":