From 9a5db16b1ffd024c17d39ffd0026dbe7b6accb18 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 1 Jan 2023 23:32:29 +0000 Subject: [PATCH] gaah a mess (but working) as there is no elf-loading in pypowersim, .data .rodata .eh_frame and other information is missing and/or in the wrong place. had to get things working exactly as in media/mp3_0.sh, which is to call just the one function with the input data pre-loaded into specific memory locations --- crypto/chacha20/.gitignore | 1 + crypto/chacha20/Makefile | 2 +- crypto/chacha20/Makefile.cross | 6 +- crypto/chacha20/calling-conv | 287 ++++++++++++++++++++++++++++++++ crypto/chacha20/chacha20.cipher | Bin 0 -> 40 bytes crypto/chacha20/chacha20.gpr | 10 +- crypto/chacha20/chacha20.iv | 1 + crypto/chacha20/chacha20.key | 1 + crypto/chacha20/chacha20.plain | Bin 0 -> 40 bytes crypto/chacha20/chacha20.sh | 8 +- crypto/chacha20/src/svp64test.c | 100 +++++++---- crypto/chacha20/src/test.c | 16 +- crypto/chacha20/src/xchacha20.c | 93 ++++++++++- 13 files changed, 463 insertions(+), 62 deletions(-) create mode 100644 crypto/chacha20/calling-conv create mode 100644 crypto/chacha20/chacha20.cipher create mode 100644 crypto/chacha20/chacha20.iv create mode 100644 crypto/chacha20/chacha20.key create mode 100644 crypto/chacha20/chacha20.plain diff --git a/crypto/chacha20/.gitignore b/crypto/chacha20/.gitignore index 7cf238fb..1941729c 100644 --- a/crypto/chacha20/.gitignore +++ b/crypto/chacha20/.gitignore @@ -1,3 +1,4 @@ *.o *.elf *.bin +*.out diff --git a/crypto/chacha20/Makefile b/crypto/chacha20/Makefile index deb5f4b9..e356fa63 100644 --- a/crypto/chacha20/Makefile +++ b/crypto/chacha20/Makefile @@ -7,7 +7,7 @@ CC = $(CROSS)gcc LD = $(CROSS)ld #compiler flags here -CFLAGS = -O3 -Wall -Wextra -static -mno-vsx -mno-altivec +CFLAGS = -O3 -Wall -Wextra -static -mno-vsx -mno-altivec -DDUMP #linker flags here LDFLAGS = -Wall -static diff --git a/crypto/chacha20/Makefile.cross b/crypto/chacha20/Makefile.cross index 178cbeab..25b51f86 100755 --- a/crypto/chacha20/Makefile.cross +++ b/crypto/chacha20/Makefile.cross @@ -5,7 +5,7 @@ AS = $(CROSS)as CC = $(CROSS)gcc LD = $(CROSS)ld OBJCOPY = $(CROSS)objcopy -CFLAGS = -O3 -Wall -Wextra -mno-vsx -mno-altivec +CFLAGS = -O3 -Wall -Wextra -mno-vsx -mno-altivec -static -DMEMDUMP AFLAGS ?= -mpwr9 @@ -28,8 +28,8 @@ $(OBJECTS): $(SRCDIR)/%.o : $(SRCDIR)/%.c #pysvp64asm $< $<.sv #$(AS) $(AFLAGS) -c $<.sv -le -o $<.o -chacha20test.bin: $(OBJECTS) - $(LD) $(OBJECTS) -EL -o $<.elf -T memmap +chacha20test.bin: $(OBJECTS) memmap + $(LD) $(OBJECTS) -static -EL -o $<.elf -T memmap $(OBJCOPY) $<.elf -I elf64-little -O binary $@ tests: $(OBJ) diff --git a/crypto/chacha20/calling-conv b/crypto/chacha20/calling-conv new file mode 100644 index 00000000..c3cab5d5 --- /dev/null +++ b/crypto/chacha20/calling-conv @@ -0,0 +1,287 @@ +tl;dr r1 stack, r2 TOC if used, r3-r10 args + + + Table 2.19. Register Roles + + +--------------------------------------------------------------------------------------------+ + | Register | Preservation Rules | Purpose | + |-----------------+------------------------+-------------------------------------------------| + | | | Optional use in function linkage. | + | r0 | Volatile | | + | | | Used in function prologues. | + |-----------------+------------------------+-------------------------------------------------| + | r1 | Nonvolatile | Stack frame pointer. | + |-----------------+------------------------+-------------------------------------------------| + | r2 | Nonvolatile[a] | TOC pointer. | + |-----------------+------------------------+-------------------------------------------------| + | r3-r10 | Volatile | Parameter and return values. | + |-----------------+------------------------+-------------------------------------------------| + | | | Optional use in function linkage. | + | r11 | Volatile | | + | | | Used as an environment pointer in languages | + | | | that require environment pointers. | + |-----------------+------------------------+-------------------------------------------------| + | | | Optional use in function linkage. | + | r12 | Volatile | | + | | | Function entry address at the global entry | + | | | point. | + |-----------------+------------------------+-------------------------------------------------| + | r13 | Reserved | Thread pointer (see Section 3.7, "Thread Local | + | | | Storage ABI"). | + |-----------------+------------------------+-------------------------------------------------| + | r14-r31[b] | Nonvolatile | Local variables. | + |-----------------+------------------------+-------------------------------------------------| + | LR | Volatile | Link register. | + |-----------------+------------------------+-------------------------------------------------| + | CTR | Volatile | Loop count register. | + |-----------------+------------------------+-------------------------------------------------| + | TAR | Reserved | Reserved for system use. This register should | + | | | not be read or written by application software. | + |-----------------+------------------------+-------------------------------------------------| + | XER | Volatile | Fixed-point exception register. | + |-----------------+------------------------+-------------------------------------------------| + | CR0-CR1 | Volatile | Condition register fields. | + |-----------------+------------------------+-------------------------------------------------| + | CR2-CR4 | Nonvolatile | Condition register fields. | + |-----------------+------------------------+-------------------------------------------------| + | CR5-CR7 | Volatile | Condition register fields. | + |-----------------+------------------------+-------------------------------------------------| + | DSCR | Limited Access | Data stream prefetch control. | + |-----------------+------------------------+-------------------------------------------------| + | VRSAVE | Reserved | Reserved for system use. This register should | + | | | not be read or written by application software. | + |--------------------------------------------------------------------------------------------| + | [a] Register r2 is nonvolatile with respect to calls between functions in the same | + | compilation unit. It is saved and restored by code inserted by the linker resolving a call | + | to an external function. For more information, see Section 2.2.1.1, "TOC Pointer Usage". | + | | + | [b] If a function needs a frame pointer, assigning r31 to the role of the frame pointer is | + | recommended. | + +--------------------------------------------------------------------------------------------+ + + + + TOC Pointer Usage + + As described in Section 3.4, "Symbol Table", the TOC pointer, r2, is commonly initialized by + the global function entry point when a function is called through the global entry point. It + may be called from a module other than the current function's module or from an unknown call + point, such as through a function pointer. (For more information, see Section 2.3.2.1, + "Function Prologue".) + + In those instances, it is the caller's responsibility to store the TOC pointer, r2, in the TOC + pointer doubleword of the caller's stack frame. For references external to the compilation + unit, this code is inserted by the static linker if a function is to be resolved by the + dynamic linker. For references through function pointers, it is the compiler's or assembler + programmer's responsibility to insert appropriate TOC save and restore code. If the function + is called from the same module as the callee, the callee must preserve the value of r2. (See + Section 3.6.1, "Function Call" for a description of function entry conventions.) + + When a function calls another function, the TOC pointer must have a legal value pointing to + the TOC base, which may be initialized as described in Section 4.2.3, "Global Offset Table". + + When global data is accessed, the TOC pointer must be available for dereference at the point + of all uses of values derived from the TOC pointer in conjunction with the @l operator. This + property is used by the linker to optimize TOC pointer accesses. In addition, all reaching + definitions for a TOC-pointer-derived access must compute the same definition for code to be + ABI compliant. (See the Section 3.6.3.1, "TOC Pointer Usage".) + + In some implementations, non ABI-compliant code may be processed by providing additional + linker options; for example, linker options disabling linker optimization. However, this + behavior in support of non-ABI compliant code is not guaranteed to be portable and supported + in all systems. For examples of compliant and noncompliant code, see Section 3.6.3.1, "TOC + Pointer Usage". + + + + Optional Function Linkage + + Except as follows, a function cannot depend on the values of those registers that are optional + in the function linkage (r0, r11, and r12) because they may be altered by interlibrary calls: + + o When a function is entered in a way to initialize its environment pointer, register r11 + contains the environment pointer. It is used to support languages with access to + additional environment context; for example, for languages that support lexical nesting to + access its lexically nested outer context. + + o When a function is entered through its global entry point, register r12 contains the + entry-point address. For more information, see the description of dual entry points in + Section 2.3.2.1, "Function Prologue" and Section 2.3.2.2, "Function Epilogue". + + + + Stack Frame Pointer + + The stack pointer always points to the lowest allocated valid stack frame. It must maintain + quadword alignment and grow toward the lower addresses. The contents of the word at that + address point to the previously allocated stack frame when the code has been compiled to + maintain back chains. A called function is permitted to decrement it if required. For more + information, see Section 2.3.8, "Dynamic Stack Space Allocation". + + + + Link Register + + The link register contains the address that a called function normally returns to. It is + volatile across function calls. + + + + Condition Register Fields + + In the condition register, the bit fields CR2, CR3, and CR4 are nonvolatile. The value on + entry must be restored on exit. The other bit fields are volatile. + + This ABI requires OpenPOWER-compliant processors to implement mfocr instructions in a manner + that initializes undefined bits of the RT result register of mfocr instructions to one of the + following values: + + o 0, in accordance with OpenPOWER-compliant processor implementation practice + + o The architected value of the corresponding CR field in the mfocr instruction + + [Note] Note + When executing an mfocr instruction, the POWER8 processor does not implement the + behavior described in the "Fixed-Point Invalid Forms and Undefined Conditions" section + of POWER8 Processor User's Manual for the Single-Chip Module. Instead, it replicates + the selected condition register field within the byte that contains it rather than + initializing to 0 the bits corresponding to the nonselected bits of the byte that + contains it. When generating code to save two condition register fields that are stored + in the same byte, the compiler must mask the value received from mfocr to avoid + corruption of the resulting (partial) condition register word. + + This erratum does not apply to the POWER9 processor. + + For more information, see Power ISA, version 3.0 and "Fixed-Point Invalid Forms and Undefined + Conditions" in POWER9 Processor User's Manual. + + Floating-Point Registers + + In OpenPOWER-compliant processors, floating-point and vector functions are implemented using + a unified vector-scalar model. As shown in Figure 2.16, "Floating-Point Registers as Part of + VSRs" and Figure 2.17, "Vector Registers as Part of VSRs", there are 64 vector-scalar + registers; each is 128 bits wide. + + The vector-scalar registers can be addressed with vector-scalar instructions, for vector and + scalar processing of all 64 registers, or with the "classic" Power floating-point instructions + to refer to a 32-register subset of 64 bits per register. They can also be addressed with VMX + instructions to refer to a 32-register subset of 128-bit wide registers. + + + + Figure 2.16. Floating-Point Registers as Part of VSRs + + + + Figure 2.17. Vector Registers as Part of VSRs + + The classic floating-point repertoire consists of 32 floating-point registers, each 64 bits + wide, and an associated special-purpose register to provide floating-point status and control. + Throughout this document, the symbol fN is used, where N is a register number, to refer to + floating-point register N. + + For the purpose of function calls, the right half of VSX registers, corresponding to the + classic floating-point registers (that is, vsr0-vsr31), is volatile. + + + + Table 2.20. Floating-Point Register Roles for Binary Floating-Point Types + + +--------------------------------------------------------------------------------------------+ + | Register | Preservation Rules | Purpose | + |----------+--------------------+------------------------------------------------------------| + | f0 | Volatile | Local variables. | + |----------+--------------------+------------------------------------------------------------| + | f1-f13 | Volatile | Used for parameter passing and return values of binary | + | | | float types. | + |----------+--------------------+------------------------------------------------------------| + | f14-f31 | Nonvolatile | Local variables. | + |----------+--------------------+------------------------------------------------------------| + | | | Floating-Point Status and Control Register limited-access | + | FPSCR | Limited-access | bits. Preservation rules governing the limited-access bits | + | | | for the bit fields [VE], [OE], [UE], [ZE], [XE], and [RN] | + | | | are presented in Section 2.2.1.2, "Limited-Access Bits". | + +--------------------------------------------------------------------------------------------+ + + + + DFP Support + + The OpenPOWER ABI supports the decimal floating-point (DFP) format and DFP language + extensions. The default implementation of DFP types shall be an implementation of the IEEE DFP + standard (IEEE Standard 754-2008). The default may be either a hardware or a software + implementation. + + The Power ISA decimal floating-point category extends the Power Architecture by adding a + decimal floating-point unit. It uses the existing 64-bit floating-point registers and extends + the FPSCR register to 64 bits, where it defines a decimal rounding-control field in the + extended space. + + Single-precision, double-precision, and quad-precision decimal floating-point parameters shall + be passed in the floating-point registers. Single-precision decimal floating-point shall + occupy the lower half of a floating-point register. Quad-precision floating-point values shall + occupy an even/odd register pair. When passing quad-precision decimal floating-point + parameters in accordance with this ABI, an odd floating-point register may be skipped in + allocation order to align quad-precision parameters and results in an even/odd register pair. + When a floating-point register is skipped during input parameter allocation, words in the + corresponding GPR or memory doubleword in the parameter list are not skipped. + + + + Table 2.21. Floating-Point Register Roles for Decimal Floating-Point Types + + +--------------------------------------------------------------------------------------------+ + | Register | Preservation Rules | Purpose | + |----------+--------------------+------------------------------------------------------------| + | | | Floating-Point Status and Control Register limited-access | + | FPSCR | Limited-access | bits. Preservation rules governing the limited-access bits | + | | | for the bit field [DRN] are presented in Section 2.2.1.2, | + | | | "Limited-Access Bits". | + +--------------------------------------------------------------------------------------------+ + + Vector Registers + + The OpenPOWER vector-category instruction repertoire provides the ability to reference 32 + vector registers, each 128 bits wide, of the vector-scalar register file, and a + special-purpose register VSCR. Throughout this document, the symbol vN is used, where N is a + register number, to refer to vector register N. + + + + Table 2.22. Vector Register Roles + + +--------------------------------------------------------------------------------------------+ + | Register | Preservation Rules | Purpose | + |----------+--------------------+------------------------------------------------------------| + | v0-v1 | Volatile | Local variables. | + |----------+--------------------+------------------------------------------------------------| + | v2-v13 | Volatile | Used for parameter passing and return values. | + |----------+--------------------+------------------------------------------------------------| + | v14-v19 | Volatile | Local variables. | + |----------+--------------------+------------------------------------------------------------| + | v20-v31 | Nonvolatile | Local variables. | + |----------+--------------------+------------------------------------------------------------| + | | | 32-bit Vector Status and Control Register. Preservation | + | VSCR | Limited-access | rules governing the limited-access bits for the bit field | + | | | [NJ] are presented in Section 2.2.1.2, "Limited-Access | + | | | Bits". | + +--------------------------------------------------------------------------------------------+ + + + + IEEE BINARY 128 QUADRUPLE PRECISION + + Parameters and function results in IEEE BINARY 128 QUADRUPLE PRECISION format shall be passed + in a single 128-bit vector register as if they were vector values. + + + + IBM EXTENDED PRECISION + + Parameters and function results in the IBM EXTENDED PRECISION format with a pair of two + double-precision floating-point values shall be passed in two successive floating-point + registers. + + If only one value can be passed in a floating-point register, the second parameter will be + passed in a GPR or in memory in accordance with the parameter passing rules for structure + aggregates. diff --git a/crypto/chacha20/chacha20.cipher b/crypto/chacha20/chacha20.cipher new file mode 100644 index 0000000000000000000000000000000000000000..d1cfe4543d58e548c63ffa1b9a5edc6531ef213b GIT binary patch literal 40 tcmaF2{7|8!Kx*cshv#mHJ>pA}RuykDE6jQ_^V4+mtesQ>@~ literal 0 HcmV?d00001 diff --git a/crypto/chacha20/chacha20.sh b/crypto/chacha20/chacha20.sh index 4e8419af..5fb46581 100755 --- a/crypto/chacha20/chacha20.sh +++ b/crypto/chacha20/chacha20.sh @@ -1,11 +1,13 @@ #!/bin/sh -xe -#-l data/audio/mp3/mp3_0_data/buf${1}:0x600000 \ -#-l data/audio/mp3/mp3_0_data/win0:0x700000 \ pypowersim -g chacha20.gpr \ -s common.spr \ -p 0x20000000 \ - -d ${2}:0x900000:128 \ + -l ./chacha20.key:0x600000 \ + -l ./chacha20.iv:0x700000 \ + -l ./chacha20.cipher:0x800000 \ + -l ./chacha20.plain:0x900000 \ + -d ./chacha20.out:0x500000:128 \ -i chacha20test.bin #cmp ${2} data/audio/mp3/mp3_0_data/samples${1} diff --git a/crypto/chacha20/src/svp64test.c b/crypto/chacha20/src/svp64test.c index d12af4b6..44fedaf0 100644 --- a/crypto/chacha20/src/svp64test.c +++ b/crypto/chacha20/src/svp64test.c @@ -20,6 +20,66 @@ void *memset(void *s, int c, size_t len) return s; } +size_t strlen(const char *s) +{ + unsigned int count = 0; + while(*s!='\0') + { + count++; + s++; + } + return count; +} + +/* implementation of memcmp to stop complaining */ +int memcmp(const void *s1, const void *s2, size_t len) +{ + unsigned const char *p = s1; + unsigned const char *q = s2; + int charCompareStatus = 0; + //If both pointer pointing same memory block + if (s1 == s2) + { + return charCompareStatus; + } + while (len > 0) + { + if (*p != *q) + { //compare the mismatching character + charCompareStatus = (*p >*q)?1:-1; + break; + } + len--; + p++; + q++; + } + return charCompareStatus; +} + +/* Test values from Crypto++ documentation */ +uint8_t key[] = { + 0x5E, 0xC5, 0x8B, 0x6D, 0x51, 0x4F, 0xE0, 0xA5, + 0x6F, 0x1E, 0x0D, 0xEA, 0x7B, 0xDC, 0x09, 0x5A, + 0x10, 0xF5, 0xB6, 0x18, 0xBD, 0xB6, 0xF2, 0x26, + 0x2F, 0xCC, 0x59, 0x7B, 0xB2, 0x30, 0xB3, 0xEF +}; + +uint8_t iv[] = { + 0xA3, 0x45, 0xF5, 0xCF, 0x80, 0x23, 0x51, 0x7C, + 0xC0, 0xFC, 0xF0, 0x75, 0x74, 0x8C, 0x86, 0x5F, + 0x7D, 0xE8, 0xCA, 0x0C, 0x72, 0x36, 0xAB, 0xDA +}; + +uint8_t correct_ciphertext[] = { + 0xEE, 0xA7, 0xC2, 0x71, 0x19, 0x10, 0x65, 0x69, + 0x92, 0xE1, 0xCE, 0xD8, 0x16, 0xE2, 0x0E, 0x62, + 0x1B, 0x25, 0x17, 0x82, 0x36, 0x71, 0x6A, 0xE4, + 0x99, 0xF2, 0x97, 0x37, 0xA7, 0x2A, 0xFC, 0xF8, + 0x6C, 0x72 +}; + +#define LOCATE_FUNC __attribute__((__section__(".fixedaddr"))) + /** Compare our output to the output of a known good XChaCha20 library. * The test vectors used here are from examples given of the Crypto++ * cryptographic library's XChaCha20 examples. These values can be @@ -28,41 +88,14 @@ void *memset(void *s, int c, size_t len) * @returns 0 on success, -1 on failure or error * */ -int check_cpp(void){ +int LOCATE_FUNC check_cpp(uint8_t *key, uint8_t *iv, + uint8_t *correct_ciphertext, + uint8_t *plaintext, uint8_t *buffer){ XChaCha_ctx ctx; - uint8_t buffer[128]; uint8_t counter[8] = {0x1}; - /* Test values from Crypto++ documentation */ - uint8_t key[] = { - 0x5E, 0xC5, 0x8B, 0x6D, 0x51, 0x4F, 0xE0, 0xA5, - 0x6F, 0x1E, 0x0D, 0xEA, 0x7B, 0xDC, 0x09, 0x5A, - 0x10, 0xF5, 0xB6, 0x18, 0xBD, 0xB6, 0xF2, 0x26, - 0x2F, 0xCC, 0x59, 0x7B, 0xB2, 0x30, 0xB3, 0xEF - }; - - uint8_t iv[] = { - 0xA3, 0x45, 0xF5, 0xCF, 0x80, 0x23, 0x51, 0x7C, - 0xC0, 0xFC, 0xF0, 0x75, 0x74, 0x8C, 0x86, 0x5F, - 0x7D, 0xE8, 0xCA, 0x0C, 0x72, 0x36, 0xAB, 0xDA - }; - - uint8_t correct_ciphertext[] = { - 0xEE, 0xA7, 0xC2, 0x71, 0x19, 0x10, 0x65, 0x69, - 0x92, 0xE1, 0xCE, 0xD8, 0x16, 0xE2, 0x0E, 0x62, - 0x1B, 0x25, 0x17, 0x82, 0x36, 0x71, 0x6A, 0xE4, - 0x99, 0xF2, 0x97, 0x37, 0xA7, 0x2A, 0xFC, 0xF8, - 0x6C, 0x72 - }; - - // annoying: this is not word-aligned. - uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!!"; uint32_t msglen = strlen((char *)plaintext); - /* knock one byte off the end */ - plaintext[msglen-1] = 0; - msglen -= 1; - xchacha_keysetup(&ctx, key, iv); /* Crypto++ initializes their counter to 1 instead of 0 */ @@ -77,9 +110,8 @@ int check_cpp(void){ return(0); } -#define LOCATE_FUNC __attribute__((__section__(".fixedaddr"))) - -int LOCATE_FUNC main(int argc, char **argv[]){ - return check_cpp(); +int main(int argc, char *argv[]){ + uint8_t buffer[128]; + return check_cpp(NULL, NULL, NULL, NULL, buffer); } diff --git a/crypto/chacha20/src/test.c b/crypto/chacha20/src/test.c index 9562fbf3..4c93c708 100644 --- a/crypto/chacha20/src/test.c +++ b/crypto/chacha20/src/test.c @@ -20,7 +20,7 @@ */ int check_cpp(void){ XChaCha_ctx ctx; - uint8_t *buffer; + uint8_t buffer[128]; uint8_t counter[8] = {0x1}; /* Test values from Crypto++ documentation */ @@ -45,19 +45,9 @@ int check_cpp(void){ 0x6C, 0x72 }; - uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!!"; + uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!"; uint32_t msglen = strlen((char *)plaintext); - /* knock one byte off the end */ - plaintext[msglen-1] = 0; - msglen -= 1; - - /* Allocate a buffer to hold our calculated ciphertext */ - if((buffer = malloc(50 * sizeof(uint8_t))) == NULL){ - perror("malloc() error"); - return(-1); - } - xchacha_keysetup(&ctx, key, iv); /* Crypto++ initializes their counter to 1 instead of 0 */ @@ -66,11 +56,9 @@ int check_cpp(void){ /* Compare our ciphertext to the correct ciphertext */ if(memcmp(buffer, correct_ciphertext, msglen) != 0){ - free(buffer); return(-1); } - free(buffer); return(0); } diff --git a/crypto/chacha20/src/xchacha20.c b/crypto/chacha20/src/xchacha20.c index 2b29f60b..27fb8f96 100644 --- a/crypto/chacha20/src/xchacha20.c +++ b/crypto/chacha20/src/xchacha20.c @@ -11,6 +11,43 @@ #include #include "xchacha20.h" +#include + +#ifdef MEMDUMP +/* dump to address that pypowersim picks up */ +uint32_t *memdump = (uint32_t*)0x6000; +#endif + +#ifdef DUMP +static void dump_hex(const void* data, size_t size) { + char ascii[17]; + size_t i, j; + ascii[16] = '\0'; + for (i = 0; i < size; ++i) { + printf("%02X ", ((unsigned char*)data)[i]); + if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') { + ascii[i % 16] = ((unsigned char*)data)[i]; + } else { + ascii[i % 16] = '.'; + } + if ((i+1) % 8 == 0 || i+1 == size) { + printf(" "); + if ((i+1) % 16 == 0) { + printf("| %s \n", ascii); + } else if (i+1 == size) { + ascii[(i+1) % 16] = '\0'; + if ((i+1) % 16 <= 8) { + printf(" "); + } + for (j = (i+1) % 16; j < 16; ++j) { + printf(" "); + } + printf("| %s \n", ascii); + } + } + } +} +#endif /** hchacha an intermediary step towards XChaCha20 based on the * construction and security proof used to create XSalsa20. @@ -99,6 +136,12 @@ void xchacha_keysetup(XChaCha_ctx *ctx, const uint8_t *k, uint8_t *iv){ ctx->input[13] = 0; /* Internal counter */ ctx->input[14] = U8TO32_LITTLE(iv + 16); ctx->input[15] = U8TO32_LITTLE(iv + 20); + +#ifdef MEMDUMP + memdump = (uint32_t*)0x6000; + *(memdump+40) = iv; /* 0x60a0 */ + *(memdump+41) = k2; /* 0x60a4 */ +#endif } @@ -132,6 +175,16 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3 if (!bytes) return; +#ifdef DUMP + dump_hex(ctx->input, 16*4); +#endif +#ifdef MEMDUMP + memdump = (uint32_t*)0x6000; /* dump to address that pypowersim picks up */ + for (i = 0; i < 16; i++) { + *(memdump+i)= ctx->input[i]; + } +#endif + j0 = ctx->input[0]; j1 = ctx->input[1]; j2 = ctx->input[2]; @@ -175,7 +228,43 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3 x15 = j15; /* Do 20 rounds instead of 8 */ - for (i = 20;i > 0;i -= 2) { + for (i = 20; i > 0;i -= 2) { +#ifdef DUMP + dump_hex(&x0, 4); + dump_hex(&x1, 4); + dump_hex(&x2, 4); + dump_hex(&x3, 4); + dump_hex(&x4, 4); + dump_hex(&x5, 4); + dump_hex(&x6, 4); + dump_hex(&x7, 4); + dump_hex(&x8, 4); + dump_hex(&x9, 4); + dump_hex(&x10, 4); + dump_hex(&x11, 4); + dump_hex(&x12, 4); + dump_hex(&x13, 4); + dump_hex(&x14, 4); + dump_hex(&x15, 4); +#endif +#ifdef MEMDUMP + *(memdump+16) = x0; + *(memdump+17) = x1; + *(memdump+18) = x2; + *(memdump+19) = x3; + *(memdump+20) = x4; + *(memdump+21) = x5; + *(memdump+22) = x6; + *(memdump+23) = x7; + *(memdump+24) = x8; + *(memdump+25) = x9; + *(memdump+26) = x10; + *(memdump+27) = x11; + *(memdump+28) = x12; + *(memdump+29) = x13; + *(memdump+30) = x14; + *(memdump+31) = x15; +#endif QUARTERROUND( x0, x4, x8,x12) QUARTERROUND( x1, x5, x9,x13) QUARTERROUND( x2, x6,x10,x14) @@ -184,7 +273,7 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3 QUARTERROUND( x1, x6,x11,x12) QUARTERROUND( x2, x7, x8,x13) QUARTERROUND( x3, x4, x9,x14) - } + } x0 = PLUS(x0,j0); x1 = PLUS(x1,j1); x2 = PLUS(x2,j2); -- 2.30.2