From ad0d1638806fefcf3856332d4b4acfff287e4cc3 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 12 Mar 2023 14:52:26 +0000 Subject: [PATCH] parameterise svstep RT (set to 16 in chacha20 test) --- crypto/chacha20/src/xchacha20_svp64.s | 3 +++ .../decoder/isa/test_caller_svp64_chacha20.py | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crypto/chacha20/src/xchacha20_svp64.s b/crypto/chacha20/src/xchacha20_svp64.s index 2d60bb78..14bb4fac 100644 --- a/crypto/chacha20/src/xchacha20_svp64.s +++ b/crypto/chacha20/src/xchacha20_svp64.s @@ -104,6 +104,9 @@ xchacha_hchacha20_svp64_real: sv.xor/w=32 *x+24, *x+24, *x+24 svremap 31, 0, 3, 2, 2, 0, 0 # RA=2, RB=3, RS=2 (0b01110) sv.rldcl/w=32 *x+24, *x+24, *SHIFTS, 0 + # 16 is the destination containing the result of svstep. + # it overlaps with SHAPE2 which is also 16. the first 8 indices + # will get corrupted. svstep. 16, 1, 0 # step to next in-regs element bc 6, 3, .inner # svstep. Rc=1 loop-end-condition? # inner-loop done: outer loop standard CTR-decrement to setvl again diff --git a/src/openpower/decoder/isa/test_caller_svp64_chacha20.py b/src/openpower/decoder/isa/test_caller_svp64_chacha20.py index cfe4e2a6..5d7a95fa 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_chacha20.py +++ b/src/openpower/decoder/isa/test_caller_svp64_chacha20.py @@ -112,7 +112,9 @@ class SVSTATETestCase(FHDLTestCase): SHAPE0 = 22 SHAPE1 = 30 SHAPE2 = 38 - shifts = 18 + shifts = 18 # registers for 4 32-bit shift amounts + ctr = 9 # register for CTR + temp = 16 # temporary register for result from svstep isa = SVP64Asm([ # set up VL=32 vertical-first, and SVSHAPEs 0-2 @@ -123,8 +125,8 @@ class SVSTATETestCase(FHDLTestCase): 'svindex %d, 2, 1, 3, 0, 1, 0' % (SHAPE2//2), # SVSHAPE2, c 'svshape2 0, 0, 3, 4, 0, 1', # SVSHAPE3, shift amount, mod 4 # establish CTR for outer round count - 'addi 16, 0, %d' % nrounds, # set number of rounds - 'mtspr 9, 16', # set CTR to number of rounds + 'addi %d, 0, %d' % (ctr, nrounds), # set number of rounds + 'mtspr 9, %d' % ctr, # set CTR to number of rounds # outer loop begins here (standard CTR loop) 'setvl %d, %d, 32, 1, 1, 0' % (vl, vl), # vertical-first, set VL # inner loop begins here. add-xor-rotl32 with remap, step, branch @@ -134,7 +136,7 @@ class SVSTATETestCase(FHDLTestCase): 'sv.xor/w=32 *%d, *%d, *%d' % (block, block, block), 'svremap 31, 0, 3, 2, 2, 0, 0', # RA=2, RB=3, RS=2 (0b01110) 'sv.rldcl/w=32 *%d, *%d, *%d, 0' % (block, block, shifts), - 'svstep. 16, 1, 0', # step to next in-regs element + 'svstep. %d, 1, 0' % temp, # step to next in-regs element 'bc 6, 3, -0x28', # svstep. Rc=1 loop-end-condition? # inner-loop done: outer loop standard CTR-decrement to setvl again 'bc 16, 0, -0x30', -- 2.30.2