From 80f5784d02e8312ea4372b653a7855a91f8b5814 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 17 Mar 2023 09:39:18 +0000 Subject: [PATCH] Add xchacha_encrypt_bytes_svp64 --- crypto/chacha20/Makefile | 6 +- .../src/xchacha_encrypt_bytes_svp64.s | 108 ++++++++++++++++++ 2 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 crypto/chacha20/src/xchacha_encrypt_bytes_svp64.s diff --git a/crypto/chacha20/Makefile b/crypto/chacha20/Makefile index 6978e8f1..6fa39d11 100644 --- a/crypto/chacha20/Makefile +++ b/crypto/chacha20/Makefile @@ -7,10 +7,10 @@ CC = $(CROSS)gcc LD = $(CROSS)ld #compiler flags here -CFLAGS = -O -Wall -Wextra -mno-vsx -mno-altivec -DDUMP -I../../media/pypowersim_wrapper -I/usr/include/python3.7m +CFLAGS = -g3 -O -Wall -Wextra -mno-vsx -mno-altivec -I../../media/pypowersim_wrapper -I/usr/include/python3.7m # assembler flags here -ASFLAGS= -mlibresoc -mregnames +ASFLAGS= -mlibresoc -mregnames -Isrc #linker flags here LDFLAGS = -Wall -pthread -lpython3.7m @@ -18,7 +18,7 @@ LDFLAGS = -Wall -pthread -lpython3.7m SRCDIR = src CFILES := $(SRCDIR)/xchacha20.c $(SRCDIR)/test.c $(SRCDIR)/xchacha20_wrapper.c -ASFILES := $(SRCDIR)/xchacha20_svp64.s +ASFILES := $(SRCDIR)/xchacha_hchacha20_svp64.s $(SRCDIR)/xchacha_encrypt_bytes_svp64.s INCLUDES := $(wildcard $(SRCDIR)/*.h)) OBJECTS := $(CFILES:$(SRCDIR)/%.c=$(SRCDIR)/%.o) $(ASFILES:$(SRCDIR)/%.s=$(SRCDIR)/%.o) diff --git a/crypto/chacha20/src/xchacha_encrypt_bytes_svp64.s b/crypto/chacha20/src/xchacha_encrypt_bytes_svp64.s new file mode 100644 index 00000000..0dcbf671 --- /dev/null +++ b/crypto/chacha20/src/xchacha_encrypt_bytes_svp64.s @@ -0,0 +1,108 @@ + .machine libresoc + .file "xchacha20_svp64.s" + .abiversion 2 + .section ".text" + .align 2 + + .include "xchacha_svp64_macros.s" + + .set tmp, 2 + .set ctx_ptr, 3 + .set m_ptr, 4 + .set c_ptr, 5 + .set bytes, 6 + .set ctr, 7 + .set i, 8 + .set SHAPE0, 8 + .set SHAPE1, 12 + .set SHAPE2, 16 + .set SHIFTS, 20 + .set VL, 22 + .set j, 24 + .set m, 32 + .set x, 40 + + .globl xchacha_encrypt_bytes_svp64_real + .type xchacha_encrypt_bytes_svp64_real, @function +xchacha_encrypt_bytes_svp64_real: + .cfi_startproc + + # if bytes == 0, return + cmplwi bytes, 0 + beqlr + + # Load 16 x 32-bit values from ctx->input + setvl 0,0,8,0,1,1 # Set VL to 8 elements + sv.ld *j, 0(ctx_ptr) + + # Set up quarterround constants, SHAPE0, SHAPE1, SHAPE2, SHIFTS + quarterround_const SHAPE0, SHAPE1, SHAPE2, SHIFTS +.loop: + # Copy j[] to x[], 16 x 32-bit elements + setvl 0,0,16,0,1,1 + sv.ori/w=32 *x, *j, 0 + + # find out how many bytes to load from m: min(bytes, 64), but need to count octets + srdi i, bytes, 3 + cmplwi i, 8 + bgt .l1 + li i, 8 + +.l1: + # Set ctr to min(64, bytes) + ori ctr, i, 0 + + # Load 64 bytes from m_ptr, 8 x 64-bit elements, set MAXVL=8 + setvl 0,ctr,8,0,1,1 + sv.ld *m, 0(m_ptr) + + # establish CTR for outer round count + li ctr, 10 + # Call QuarterRound macro for CTR loops on x[] + quarterround x, ctr, VL, SHAPE0, SHAPE1, SHAPE2, SHIFTS + + # Add j[] to x[], 16 x 32-bit elements + setvl 0,0,16,0,1,1 + sv.add/w=32 *x, *x, *j + + # XOR x[] elements with m[], 16 x 32-bit elements + sv.xor/w=32 *x, *x, *m + + # j12++; if (!j12) j13++; + addi j+6, j+6, 1 # j12 is in the 6th 64-bit register + cmplwi j+6, 0 + bne .l2 # if j12 != 0 skip this + ldi tmp, 0x100000000 # we have 2x32-bit values in the register, so need to add 1 << 32 + add j+6, j+6, tmp + +.l2: + # Store 8 x 64-bit from x[] to c_ptr + setvl 0,0,8,0,1,1 + sv.std *x, 0(c_ptr) + + cmplwi bytes, 64 + bgt .l5 +.l3: + bne .l4 + # find out how many bytes to load from m: min(bytes, 64), but need to count octets + srdi i, bytes, 3 + setvl 0,i,8,0,1,1 + sv.ld *m, 0(c_ptr) + +.l4: + std j+6, 48(ctx_ptr) + blr +.l5: + subi bytes, bytes, 64 + addi c_ptr, c_ptr, 64 + addi m_ptr, m_ptr, 64 + b .loop + + .long 0 + .byte 0,0,0,0,0,3,0,0 + .cfi_endproc + +.LFE0: + .size xchacha_encrypt_bytes_svp64_real,.-xchacha_encrypt_bytes_svp64_real + .ident "GCC: (Debian 8.3.0-6) 8.3.0" + .section .note.GNU-stack,"",@progbits -- 2.30.2