From f86e4a8e7134687196889c183b956dd815596ee7 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 20 Oct 2022 17:27:59 +0100 Subject: [PATCH] add first chacha20 round test --- openpower/isa/fixedshift.mdwn | 2 +- .../decoder/isa/test_caller_svindex.py | 179 ++++++++++++++---- 2 files changed, 143 insertions(+), 38 deletions(-) diff --git a/openpower/isa/fixedshift.mdwn b/openpower/isa/fixedshift.mdwn index fd88c1a3..9d90da3c 100644 --- a/openpower/isa/fixedshift.mdwn +++ b/openpower/isa/fixedshift.mdwn @@ -38,7 +38,7 @@ M-Form Pseudo-code: n <- (RB)[XLEN-5:XLEN-1] - r <- ROTL32((RS)[32:63], n) + r <- ROTL32((RS)[XLEN-32:XLEN-1], n) m <- MASK32(MB, ME) RA <- r & m diff --git a/src/openpower/decoder/isa/test_caller_svindex.py b/src/openpower/decoder/isa/test_caller_svindex.py index 787aa990..f4d15884 100644 --- a/src/openpower/decoder/isa/test_caller_svindex.py +++ b/src/openpower/decoder/isa/test_caller_svindex.py @@ -19,6 +19,89 @@ from openpower.consts import SVP64CROffs from copy import deepcopy +# originally from https://github.com/pts/chacha20 +def quarter_round_schedule(x, a, b, c, d): + """collate list of reg-offsets for use with svindex/svremap + """ + #x[a] = (x[a] + x[b]) & 0xffffffff + #x[d] = x[d] ^ x[a] + #x[d] = rotate(x[d], 16) + x.append((a, b, d, 16)) + + #x[c] = (x[c] + x[d]) & 0xffffffff + #x[b] = x[b] ^ x[c] + #x[b] = rotate(x[b], 12) + x.append((c, d, b, 12)) + + #x[a] = (x[a] + x[b]) & 0xffffffff + #x[d] = x[d] ^ x[a] + #x[d] = rotate(x[d], 8) + x.append((a, b, d, 8)) + + #x[c] = (x[c] + x[d]) & 0xffffffff + #x[b] = x[b] ^ x[c] + #x[b] = rotate(x[b], 7) + x.append((c, d, b, 7)) + + +def rotl32(v, c): + c = c & 0x1f + res = ((v << c) & 0xffffffff) | v >> (32 - c) + print ("op rotl32", hex(res), hex(v), hex(c)) + return res + + +def add(a, b): + res = (a + b) & 0xffffffff + print ("op add", hex(res), hex(a), hex(b)) + return res + + +def xor(a, b): + res = a ^ b + print ("op xor", hex(res), hex(a), hex(b)) + return res + + +def sthth_round(x, a, b, d, rot): + x[a] = add(x[a], x[b]) + x[d] = xor(x[d], x[a]) + x[d] = rotl32(x[d], rot) + +def quarter_round(x, a, b, c, d): + """collate list of reg-offsets for use with svindex/svremap + """ + sthth_round(x, a, b, d, 16) + sthth_round(x, c, d, b, 12) + sthth_round(x, a, b, d, 8) + sthth_round(x, c, d, b, 7) + + +def chacha_idx_schedule(x, fn=quarter_round_schedule): + fn(x, 0, 4, 8, 12) + fn(x, 1, 5, 9, 13) + fn(x, 2, 6, 10, 14) + fn(x, 3, 7, 11, 15) + fn(x, 0, 5, 10, 15) + fn(x, 1, 6, 11, 12) + fn(x, 2, 7, 8, 13) + fn(x, 3, 4, 9, 14) + + +def get_masked_reg(regs, base, offs, ew_bits): + # rrrright. start by breaking down into row/col, based on elwidth + gpr_offs = offs // (64//ew_bits) + gpr_col = offs % (64//ew_bits) + # compute the mask based on ew_bits + mask = (1<> (gpr_col*ew_bits) + + def set_masked_reg(regs, base, offs, ew_bits, value): # rrrright. start by breaking down into row/col, based on elwidth gpr_offs = offs // (64//ew_bits) @@ -35,10 +118,6 @@ def set_masked_reg(regs, base, offs, ew_bits, value): val |= value << (gpr_col*ew_bits) regs[base+gpr_offs] = val -def rotl32(v, c): - c = c & 0x1f - return ((v << c) & 0xffffffff) | v >> (32 - c) - class SVSTATETestCase(FHDLTestCase): @@ -456,51 +535,81 @@ class SVSTATETestCase(FHDLTestCase): def test_1_sv_index_rot32(self): """sets VL=8 (via SVSTATE) then does modulo 4 svindex, and a rotate. - RA is re-mapped via Indexing, RB is re-mapped via different Indexing, + + svremap RT=0,RA=1,RB=0 + add r0, r1, r0 RT, RA, RB + svremap RS=2,RA=2,RB=0 # RB stays = 0 + xor r2, r2, r0 RA, RS, RB + svremap RS=2,RA=2,RB=3 # RA stays = 2 + rlwnm r2, r2, r3, 0, 31 rlwnm RA,RS,RB,MB,ME (Rc=0) """ isa = SVP64Asm([ - 'svindex 14, 5, 4, 3, 0, 1, 0', # SVSHAPE1, RB, mod 4 - 'svindex 12, 8, 1, 3, 0, 1, 0', # SVSHAPE0, RS - 'sv.rlwnm *8, *0, *16, 0, 31', + 'setvl 17, 17, 32, 1, 1, 1', # vertical-first + 'svindex 11, 0, 1, 3, 0, 1, 0', # SVSHAPE0, a + 'svindex 15, 1, 1, 3, 0, 1, 0', # SVSHAPE1, b + 'svindex 19, 2, 1, 3, 0, 1, 0', # SVSHAPE2, c + 'svindex 21, 3, 4, 3, 0, 1, 0', # SVSHAPE3, shift amount, mod 4 + 'svremap 31, 1, 0, 0, 0, 0, 0', # RA=1, RB=0, RT=0 (0b01011) + 'sv.add/w=32 *0, *0, *0', + 'svremap 31, 2, 0, 2, 2, 0, 0', # RA=2, RB=0, RS=2 (0b00111) + 'sv.xor/w=32 *0, *0, *0', + 'svremap 31, 0, 3, 2, 2, 0, 0', # RA=2, RB=3, RS=2 (0b01110) + 'sv.rlwnm/w=32 *0, *0, *18, 0, 31', + 'svstep. 17, 1, 0', # step to next + 'bc 6, 3, -0x28', # VF loop ]) lst = list(isa) print ("listing", lst) + schedule = [] + chacha_idx_schedule(schedule, fn=quarter_round_schedule) + # initial values in GPR regfile initial_regs = [0] * 128 - idxs = [1, 0, 5, 2, 4, 3, 7, 6] # random enough - for i in range(8): - set_masked_reg(initial_regs, 24, i, ew_bits=8, value=idxs[i]) - initial_regs[i] = i + + # offsets for a b c + for i, (a,b,c,d) in enumerate(schedule): + set_masked_reg(initial_regs, 22, i, ew_bits=8, value=a) + set_masked_reg(initial_regs, 30, i, ew_bits=8, value=b) + set_masked_reg(initial_regs, 38, i, ew_bits=8, value=c) + + # offsets for d (modulo 4 shift amount) shifts = [16, 12, 8, 7] # chacha20 shifts - idxs2 = [1, 2, 3, 0] # cycle order (for fun) + idxs2 = [0, 1, 2, 3] # cycle order (for fun) for i in range(4): - set_masked_reg(initial_regs, 28, i, ew_bits=8, value=idxs2[i]) - initial_regs[16+i] = shifts[i] + set_masked_reg(initial_regs, 42, i, ew_bits=8, value=idxs2[i]) + set_masked_reg(initial_regs, 18, i, ew_bits=32, value=shifts[i]) + + initial_regs[17] = 32 # VL=2 + x = [0] * 16 + for i in range(16): + x[i] = i<<1 + for i in range(16): + set_masked_reg(initial_regs, 0, i, ew_bits=32, value=x[i]) # SVSTATE vl=10 svstate = SVP64State() - svstate.vl = 8 # VL - svstate.maxvl = 8 # MAXVL + svstate.vl = 32 # VL + svstate.maxvl = 32 # MAXVL print ("SVSTATE", bin(svstate.asint())) # copy before running, compute expected results expected_regs = deepcopy(initial_regs) - for i in range(8): - RS = initial_regs[0+idxs[i%8]] - RB = initial_regs[16+idxs2[i%4]] - expected_regs[i+8] = rotl32(RS, RB) + expected_regs[17] = 0 # reaches zero + expected = deepcopy(x) + chacha_idx_schedule(expected, fn=quarter_round) + for i in range(16): + set_masked_reg(expected_regs, 0, i, ew_bits=32, value=expected[i]) with Program(lst, bigendian=False) as program: sim = self.run_tst_program(program, initial_regs, svstate=svstate) # print out expected regs for i in range(8): - RS = initial_regs[0+idxs[i%8]] - RB = initial_regs[16+idxs2[i%4]] - print ("expected", i, hex(RS), hex(RB), hex(expected_regs[i+8])) + RS = sim.gpr(i).value + print ("expected", i, hex(RS), hex(expected_regs[i])) print (sim.spr) SVSHAPE0 = sim.spr['SVSHAPE0'] @@ -525,19 +634,15 @@ class SVSTATETestCase(FHDLTestCase): print ("STATE0order", SVSHAPE0.order) print (sim.gpr.dump()) self._check_regs(sim, expected_regs) - self.assertEqual(sim.svstate.RMpst, 1) # mm=1 so persist=1 - self.assertEqual(sim.svstate.SVme, 0b00110) # RS and RB active - # rmm is 0b00001 which means mi0=0 and all others inactive (0) - self.assertEqual(sim.svstate.mi0, 0) # RS - self.assertEqual(sim.svstate.mi1, 1) # RB - self.assertEqual(sim.svstate.mi2, 0) # ignore - self.assertEqual(sim.svstate.mo0, 0) # ignore - self.assertEqual(sim.svstate.mo1, 0) # ignore - self.assertEqual(SVSHAPE0.svgpr, 24) # SVG is shifted up by 1 - self.assertEqual(SVSHAPE1.svgpr, 28) # SVG is shifted up by 1 - for i in range(2,4): - shape = sim.spr['SVSHAPE%d' % i] - self.assertEqual(shape.svgpr, 0) + self.assertEqual(sim.svstate.RMpst, 0) + self.assertEqual(sim.svstate.SVme, 0b11111) + self.assertEqual(sim.svstate.mi0, 0) + self.assertEqual(sim.svstate.mi1, 3) + self.assertEqual(sim.svstate.mi2, 2) + self.assertEqual(sim.svstate.mo0, 2) + self.assertEqual(sim.svstate.mo1, 0) + self.assertEqual(SVSHAPE0.svgpr, 22) + self.assertEqual(SVSHAPE1.svgpr, 30) def run_tst_program(self, prog, initial_regs=None, svstate=None): -- 2.30.2