From 2d4dd0547d929632878a57fd0f6dbf47d7f23bbf Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 21 Oct 2022 12:47:41 +0100 Subject: [PATCH] move chacha20 to separate test, set/get masked regs to ISACaller --- src/openpower/decoder/isa/caller.py | 32 +++ .../decoder/isa/test_caller_svindex.py | 215 +---------------- .../decoder/isa/test_caller_svp64_chacha20.py | 225 ++++++++++++++++++ 3 files changed, 258 insertions(+), 214 deletions(-) create mode 100644 src/openpower/decoder/isa/test_caller_svp64_chacha20.py diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index fc89afea..23e5bc03 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -105,6 +105,38 @@ REG_SORT_ORDER = { fregs = ['FRA', 'FRB', 'FRC', 'FRS', 'FRT'] +def get_masked_reg(regs, base, offs, ew_bits): + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//ew_bits) + gpr_col = offs % (64//ew_bits) + # compute the mask based on ew_bits + mask = (1<> (gpr_col*ew_bits) + + +def set_masked_reg(regs, base, offs, ew_bits, value): + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//ew_bits) + gpr_col = offs % (64//ew_bits) + # compute the mask based on ew_bits + mask = (1<> (32 - c) - print ("op rotl32", hex(res), hex(v), hex(c)) - return res - - -def add(a, b): - res = (a + b) & 0xffffffff - print ("op add", hex(res), hex(a), hex(b)) - return res - - -def xor(a, b): - res = a ^ b - print ("op xor", hex(res), hex(a), hex(b)) - return res - - -def sthth_round(x, a, b, d, rot): - x[a] = add(x[a], x[b]) - x[d] = xor(x[d], x[a]) - x[d] = rotl32(x[d], rot) - -def quarter_round(x, a, b, c, d): - """collate list of reg-offsets for use with svindex/svremap - """ - sthth_round(x, a, b, d, 16) - sthth_round(x, c, d, b, 12) - sthth_round(x, a, b, d, 8) - sthth_round(x, c, d, b, 7) - - -def chacha_idx_schedule(x, fn=quarter_round_schedule): - fn(x, 0, 4, 8, 12) - fn(x, 1, 5, 9, 13) - fn(x, 2, 6, 10, 14) - fn(x, 3, 7, 11, 15) - fn(x, 0, 5, 10, 15) - fn(x, 1, 6, 11, 12) - fn(x, 2, 7, 8, 13) - fn(x, 3, 4, 9, 14) - - -def get_masked_reg(regs, base, offs, ew_bits): - # rrrright. start by breaking down into row/col, based on elwidth - gpr_offs = offs // (64//ew_bits) - gpr_col = offs % (64//ew_bits) - # compute the mask based on ew_bits - mask = (1<> (gpr_col*ew_bits) - - -def set_masked_reg(regs, base, offs, ew_bits, value): - # rrrright. start by breaking down into row/col, based on elwidth - gpr_offs = offs // (64//ew_bits) - gpr_col = offs % (64//ew_bits) - # compute the mask based on ew_bits - mask = (1<> (32 - c) + print ("op rotl32", hex(res), hex(v), hex(c)) + return res + + +def add(a, b): + res = (a + b) & 0xffffffff + print ("op add", hex(res), hex(a), hex(b)) + return res + + +def xor(a, b): + res = a ^ b + print ("op xor", hex(res), hex(a), hex(b)) + return res + + +def sthth_round(x, a, b, d, rot): + x[a] = add(x[a], x[b]) + x[d] = xor(x[d], x[a]) + x[d] = rotl32(x[d], rot) + +def quarter_round(x, a, b, c, d): + """collate list of reg-offsets for use with svindex/svremap + """ + sthth_round(x, a, b, d, 16) + sthth_round(x, c, d, b, 12) + sthth_round(x, a, b, d, 8) + sthth_round(x, c, d, b, 7) + + +def chacha_idx_schedule(x, fn=quarter_round_schedule): + fn(x, 0, 4, 8, 12) + fn(x, 1, 5, 9, 13) + fn(x, 2, 6, 10, 14) + fn(x, 3, 7, 11, 15) + fn(x, 0, 5, 10, 15) + fn(x, 1, 6, 11, 12) + fn(x, 2, 7, 8, 13) + fn(x, 3, 4, 9, 14) + + +class SVSTATETestCase(FHDLTestCase): + + def _check_regs(self, sim, expected): + print ("GPR") + sim.gpr.dump() + for i in range(32): + self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64), + "GPR %d %x expected %x" % (i, sim.gpr(i).value, expected[i])) + + def test_1_sv_index_rot32(self): + """sets VL=8 (via SVSTATE) then does modulo 4 svindex, and a rotate. + RA is re-mapped via Indexing, + RB is re-mapped via different Indexing, + + svremap RT=0,RA=1,RB=0 + add r0, r1, r0 RT, RA, RB + svremap RS=2,RA=2,RB=0 # RB stays = 0 + xor r2, r2, r0 RA, RS, RB + svremap RS=2,RA=2,RB=3 # RA stays = 2 + rlwnm r2, r2, r3, 0, 31 rlwnm RA,RS,RB,MB,ME (Rc=0) + """ + isa = SVP64Asm([ + # set up VL=32 vertical-first, and SVSHAPEs 0-2 + 'setvl 0, 0, 32, 1, 0, 1', # vertical-first, set MAXVL + 'svindex 11, 0, 1, 3, 0, 1, 0', # SVSHAPE0, a + 'svindex 15, 1, 1, 3, 0, 1, 0', # SVSHAPE1, b + 'svindex 19, 2, 1, 3, 0, 1, 0', # SVSHAPE2, c + 'svindex 21, 3, 4, 3, 0, 1, 0', # SVSHAPE3, shift amount, mod 4 + 'setvl 17, 17, 32, 1, 1, 0', # vertical-first, set VL from r17 + # inner loop begins here. add-xor-rotl32 with remap, step, branch + 'svremap 31, 1, 0, 0, 0, 0, 0', # RA=1, RB=0, RT=0 (0b01011) + 'sv.add/w=32 *0, *0, *0', + 'svremap 31, 2, 0, 2, 2, 0, 0', # RA=2, RB=0, RS=2 (0b00111) + 'sv.xor/w=32 *0, *0, *0', + 'svremap 31, 0, 3, 2, 2, 0, 0', # RA=2, RB=3, RS=2 (0b01110) + 'sv.rldcl/w=32 *0, *0, *18, 0', + 'svstep. 17, 1, 0', # step to next + 'bc 6, 3, -0x28', # VF loop + ]) + lst = list(isa) + print ("listing", lst) + + schedule = [] + chacha_idx_schedule(schedule, fn=quarter_round_schedule) + + # initial values in GPR regfile + initial_regs = [0] * 128 + + # offsets for a b c + for i, (a,b,c,d) in enumerate(schedule): + set_masked_reg(initial_regs, 22, i, ew_bits=8, value=a) + set_masked_reg(initial_regs, 30, i, ew_bits=8, value=b) + set_masked_reg(initial_regs, 38, i, ew_bits=8, value=c) + + # offsets for d (modulo 4 shift amount) + shifts = [16, 12, 8, 7] # chacha20 shifts + idxs2 = [0, 1, 2, 3] # cycle order (for fun) + for i in range(4): + set_masked_reg(initial_regs, 42, i, ew_bits=8, value=idxs2[i]) + set_masked_reg(initial_regs, 18, i, ew_bits=32, value=shifts[i]) + + initial_regs[17] = 32 # VL=2 + x = [0] * 16 + for i in range(16): + x[i] = i<<1 + for i in range(16): + set_masked_reg(initial_regs, 0, i, ew_bits=32, value=x[i]) + + # SVSTATE vl=32 + svstate = SVP64State() + svstate.vl = 32 # VL + svstate.maxvl = 32 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + # copy before running, compute expected results + expected_regs = deepcopy(initial_regs) + expected_regs[17] = 0 # reaches zero + expected = deepcopy(x) + chacha_idx_schedule(expected, fn=quarter_round) + for i in range(16): + set_masked_reg(expected_regs, 0, i, ew_bits=32, value=expected[i]) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_regs, svstate=svstate) + + # print out expected regs + for i in range(8): + RS = sim.gpr(i).value + print ("expected", i, hex(RS), hex(expected_regs[i])) + + print (sim.spr) + SVSHAPE0 = sim.spr['SVSHAPE0'] + SVSHAPE1 = sim.spr['SVSHAPE1'] + print ("SVSTATE after", bin(sim.svstate.asint())) + print (" vl", bin(sim.svstate.vl)) + print (" mvl", bin(sim.svstate.maxvl)) + print (" srcstep", bin(sim.svstate.srcstep)) + print (" dststep", bin(sim.svstate.dststep)) + print (" RMpst", bin(sim.svstate.RMpst)) + print (" SVme", bin(sim.svstate.SVme)) + print (" mo0", bin(sim.svstate.mo0)) + print (" mo1", bin(sim.svstate.mo1)) + print (" mi0", bin(sim.svstate.mi0)) + print (" mi1", bin(sim.svstate.mi1)) + print (" mi2", bin(sim.svstate.mi2)) + print ("STATE0svgpr", hex(SVSHAPE0.svgpr)) + print ("STATE0 xdim", SVSHAPE0.xdimsz) + print ("STATE0 ydim", SVSHAPE0.ydimsz) + print ("STATE0 skip", bin(SVSHAPE0.skip)) + print ("STATE0 inv", SVSHAPE0.invxyz) + print ("STATE0order", SVSHAPE0.order) + print (sim.gpr.dump()) + self._check_regs(sim, expected_regs) + self.assertEqual(sim.svstate.RMpst, 0) + self.assertEqual(sim.svstate.SVme, 0b11111) + self.assertEqual(sim.svstate.mi0, 0) + self.assertEqual(sim.svstate.mi1, 3) + self.assertEqual(sim.svstate.mi2, 2) + self.assertEqual(sim.svstate.mo0, 2) + self.assertEqual(sim.svstate.mo1, 0) + self.assertEqual(SVSHAPE0.svgpr, 22) + self.assertEqual(SVSHAPE1.svgpr, 30) + + def run_tst_program(self, prog, initial_regs=None, + svstate=None): + if initial_regs is None: + initial_regs = [0] * 32 + simulator = run_tst(prog, initial_regs, svstate=svstate) + simulator.gpr.dump() + return simulator + + +if __name__ == "__main__": + unittest.main() + -- 2.30.2