gaah a mess (but working)
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 1 Jan 2023 23:32:29 +0000 (23:32 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 2 Jun 2023 18:51:16 +0000 (19:51 +0100)
as there is no elf-loading in pypowersim, .data .rodata .eh_frame and other
information is missing and/or in the wrong place.

had to get things working exactly as in media/mp3_0.sh, which is to call
just the one function with the input data pre-loaded into specific memory
locations

13 files changed:
crypto/chacha20/.gitignore
crypto/chacha20/Makefile
crypto/chacha20/Makefile.cross
crypto/chacha20/calling-conv [new file with mode: 0644]
crypto/chacha20/chacha20.cipher [new file with mode: 0644]
crypto/chacha20/chacha20.gpr
crypto/chacha20/chacha20.iv [new file with mode: 0644]
crypto/chacha20/chacha20.key [new file with mode: 0644]
crypto/chacha20/chacha20.plain [new file with mode: 0644]
crypto/chacha20/chacha20.sh
crypto/chacha20/src/svp64test.c
crypto/chacha20/src/test.c
crypto/chacha20/src/xchacha20.c

index 7cf238fbc442c19fc729bbe8d393527d5342ca8e..1941729cddea823ce5bb8d548d93d448660903f5 100644 (file)
@@ -1,3 +1,4 @@
 *.o
 *.elf
 *.bin
+*.out
index deb5f4b997df374f46c0e4cb38a66ba08c665c84..e356fa63309baf4d52f48b14d006899f5c1b009e 100644 (file)
@@ -7,7 +7,7 @@ CC = $(CROSS)gcc
 LD = $(CROSS)ld
 
 #compiler flags here
-CFLAGS = -O3 -Wall -Wextra -static -mno-vsx -mno-altivec
+CFLAGS = -O3 -Wall -Wextra -static -mno-vsx -mno-altivec -DDUMP
 
 #linker flags here
 LDFLAGS = -Wall -static
index 178cbeab893ee2475925115c22bf412e29c1c311..25b51f867865e1cb9802672cc06d8be418ce303b 100755 (executable)
@@ -5,7 +5,7 @@ AS = $(CROSS)as
 CC = $(CROSS)gcc
 LD = $(CROSS)ld
 OBJCOPY = $(CROSS)objcopy
-CFLAGS = -O3 -Wall -Wextra -mno-vsx -mno-altivec
+CFLAGS = -O3 -Wall -Wextra -mno-vsx -mno-altivec -static -DMEMDUMP
 
 AFLAGS ?= -mpwr9
 
@@ -28,8 +28,8 @@ $(OBJECTS): $(SRCDIR)/%.o : $(SRCDIR)/%.c
 
 #pysvp64asm $< $<.sv
 #$(AS) $(AFLAGS) -c $<.sv -le -o $<.o
-chacha20test.bin: $(OBJECTS)
-       $(LD) $(OBJECTS) -EL -o $<.elf -T memmap
+chacha20test.bin: $(OBJECTS) memmap
+       $(LD) $(OBJECTS) -static -EL -o $<.elf -T memmap
        $(OBJCOPY) $<.elf -I elf64-little -O binary $@
 
 tests: $(OBJ) 
diff --git a/crypto/chacha20/calling-conv b/crypto/chacha20/calling-conv
new file mode 100644 (file)
index 0000000..c3cab5d
--- /dev/null
@@ -0,0 +1,287 @@
+tl;dr r1 stack, r2 TOC if used, r3-r10 args
+
+
+   Table 2.19. Register Roles
+
+   +--------------------------------------------------------------------------------------------+
+   |    Register     |   Preservation Rules   |                     Purpose                     |
+   |-----------------+------------------------+-------------------------------------------------|
+   |                 |                        | Optional use in function linkage.               |
+   | r0              | Volatile               |                                                 |
+   |                 |                        | Used in function prologues.                     |
+   |-----------------+------------------------+-------------------------------------------------|
+   | r1              | Nonvolatile            | Stack frame pointer.                            |
+   |-----------------+------------------------+-------------------------------------------------|
+   | r2              | Nonvolatile[a]         | TOC pointer.                                    |
+   |-----------------+------------------------+-------------------------------------------------|
+   | r3-r10          | Volatile               | Parameter and return values.                    |
+   |-----------------+------------------------+-------------------------------------------------|
+   |                 |                        | Optional use in function linkage.               |
+   | r11             | Volatile               |                                                 |
+   |                 |                        | Used as an environment pointer in languages     |
+   |                 |                        | that require environment pointers.              |
+   |-----------------+------------------------+-------------------------------------------------|
+   |                 |                        | Optional use in function linkage.               |
+   | r12             | Volatile               |                                                 |
+   |                 |                        | Function entry address at the global entry      |
+   |                 |                        | point.                                          |
+   |-----------------+------------------------+-------------------------------------------------|
+   | r13             | Reserved               | Thread pointer (see Section 3.7, "Thread Local  |
+   |                 |                        | Storage ABI").                                  |
+   |-----------------+------------------------+-------------------------------------------------|
+   | r14-r31[b]      | Nonvolatile            | Local variables.                                |
+   |-----------------+------------------------+-------------------------------------------------|
+   | LR              | Volatile               | Link register.                                  |
+   |-----------------+------------------------+-------------------------------------------------|
+   | CTR             | Volatile               | Loop count register.                            |
+   |-----------------+------------------------+-------------------------------------------------|
+   | TAR             | Reserved               | Reserved for system use. This register should   |
+   |                 |                        | not be read or written by application software. |
+   |-----------------+------------------------+-------------------------------------------------|
+   | XER             | Volatile               | Fixed-point exception register.                 |
+   |-----------------+------------------------+-------------------------------------------------|
+   | CR0-CR1         | Volatile               | Condition register fields.                      |
+   |-----------------+------------------------+-------------------------------------------------|
+   | CR2-CR4         | Nonvolatile            | Condition register fields.                      |
+   |-----------------+------------------------+-------------------------------------------------|
+   | CR5-CR7         | Volatile               | Condition register fields.                      |
+   |-----------------+------------------------+-------------------------------------------------|
+   | DSCR            | Limited Access         | Data stream prefetch control.                   |
+   |-----------------+------------------------+-------------------------------------------------|
+   | VRSAVE          | Reserved               | Reserved for system use. This register should   |
+   |                 |                        | not be read or written by application software. |
+   |--------------------------------------------------------------------------------------------|
+   | [a] Register r2 is nonvolatile with respect to calls between functions in the same         |
+   | compilation unit. It is saved and restored by code inserted by the linker resolving a call |
+   | to an external function. For more information, see Section 2.2.1.1, "TOC Pointer Usage".   |
+   |                                                                                            |
+   | [b] If a function needs a frame pointer, assigning r31 to the role of the frame pointer is |
+   | recommended.                                                                               |
+   +--------------------------------------------------------------------------------------------+
+
+    
+
+       TOC Pointer Usage
+
+   As described in Section 3.4, "Symbol Table", the TOC pointer, r2, is commonly initialized by
+   the global function entry point when a function is called through the global entry point. It
+   may be called from a module other than the current function's module or from an unknown call
+   point, such as through a function pointer. (For more information, see Section 2.3.2.1,
+   "Function Prologue".)
+
+   In those instances, it is the caller's responsibility to store the TOC pointer, r2, in the TOC
+   pointer doubleword of the caller's stack frame. For references external to the compilation
+   unit, this code is inserted by the static linker if a function is to be resolved by the
+   dynamic linker. For references through function pointers, it is the compiler's or assembler
+   programmer's responsibility to insert appropriate TOC save and restore code. If the function
+   is called from the same module as the callee, the callee must preserve the value of r2. (See
+   Section 3.6.1, "Function Call" for a description of function entry conventions.)
+
+   When a function calls another function, the TOC pointer must have a legal value pointing to
+   the TOC base, which may be initialized as described in Section 4.2.3, "Global Offset Table".
+
+   When global data is accessed, the TOC pointer must be available for dereference at the point
+   of all uses of values derived from the TOC pointer in conjunction with the @l operator. This
+   property is used by the linker to optimize TOC pointer accesses. In addition, all reaching
+   definitions for a TOC-pointer-derived access must compute the same definition for code to be
+   ABI compliant. (See the Section 3.6.3.1, "TOC Pointer Usage".)
+
+   In some implementations, non ABI-compliant code may be processed by providing additional
+   linker options; for example, linker options disabling linker optimization. However, this
+   behavior in support of non-ABI compliant code is not guaranteed to be portable and supported
+   in all systems. For examples of compliant and noncompliant code, see Section 3.6.3.1, "TOC
+   Pointer Usage".
+
+    
+
+       Optional Function Linkage
+
+   Except as follows, a function cannot depend on the values of those registers that are optional
+   in the function linkage (r0, r11, and r12) because they may be altered by interlibrary calls:
+
+     o When a function is entered in a way to initialize its environment pointer, register r11
+       contains the environment pointer. It is used to support languages with access to
+       additional environment context; for example, for languages that support lexical nesting to
+       access its lexically nested outer context.
+
+     o When a function is entered through its global entry point, register r12 contains the
+       entry-point address. For more information, see the description of dual entry points in
+       Section 2.3.2.1, "Function Prologue" and Section 2.3.2.2, "Function Epilogue".
+
+    
+
+       Stack Frame Pointer
+
+   The stack pointer always points to the lowest allocated valid stack frame. It must maintain
+   quadword alignment and grow toward the lower addresses. The contents of the word at that
+   address point to the previously allocated stack frame when the code has been compiled to
+   maintain back chains. A called function is permitted to decrement it if required. For more
+   information, see Section 2.3.8, "Dynamic Stack Space Allocation".
+
+    
+
+       Link Register
+
+   The link register contains the address that a called function normally returns to. It is
+   volatile across function calls.
+
+    
+
+       Condition Register Fields
+
+   In the condition register, the bit fields CR2, CR3, and CR4 are nonvolatile. The value on
+   entry must be restored on exit. The other bit fields are volatile.
+
+   This ABI requires OpenPOWER-compliant processors to implement mfocr instructions in a manner
+   that initializes undefined bits of the RT result register of mfocr instructions to one of the
+   following values:
+
+     o 0, in accordance with OpenPOWER-compliant processor implementation practice
+
+     o The architected value of the corresponding CR field in the mfocr instruction
+
+   [Note] Note                                                                                    
+          When executing an mfocr instruction, the POWER8 processor does not implement the        
+          behavior described in the "Fixed-Point Invalid Forms and Undefined Conditions" section  
+          of POWER8 Processor User's Manual for the Single-Chip Module. Instead, it replicates    
+          the selected condition register field within the byte that contains it rather than      
+          initializing to 0 the bits corresponding to the nonselected bits of the byte that       
+          contains it. When generating code to save two condition register fields that are stored 
+          in the same byte, the compiler must mask the value received from mfocr to avoid         
+          corruption of the resulting (partial) condition register word.                          
+                                                                                                  
+          This erratum does not apply to the POWER9 processor.                                    
+
+    For more information, see Power ISA, version 3.0 and "Fixed-Point Invalid Forms and Undefined
+   Conditions" in POWER9 Processor User's Manual.
+
+       Floating-Point Registers
+
+    In OpenPOWER-compliant processors, floating-point and vector functions are implemented using
+   a unified vector-scalar model. As shown in Figure 2.16, "Floating-Point Registers as Part of
+   VSRs" and Figure 2.17, "Vector Registers as Part of VSRs", there are 64 vector-scalar
+   registers; each is 128 bits wide.
+
+   The vector-scalar registers can be addressed with vector-scalar instructions, for vector and
+   scalar processing of all 64 registers, or with the "classic" Power floating-point instructions
+   to refer to a 32-register subset of 64 bits per register. They can also be addressed with VMX
+   instructions to refer to a 32-register subset of 128-bit wide registers.
+
+    
+
+   Figure 2.16. Floating-Point Registers as Part of VSRs
+
+    
+
+   Figure 2.17. Vector Registers as Part of VSRs
+
+   The classic floating-point repertoire consists of 32 floating-point registers, each 64 bits
+   wide, and an associated special-purpose register to provide floating-point status and control.
+   Throughout this document, the symbol fN is used, where N is a register number, to refer to
+   floating-point register N.
+
+   For the purpose of function calls, the right half of VSX registers, corresponding to the
+   classic floating-point registers (that is, vsr0-vsr31), is volatile.
+
+    
+
+   Table 2.20. Floating-Point Register Roles for Binary Floating-Point Types
+
+   +--------------------------------------------------------------------------------------------+
+   | Register | Preservation Rules |                          Purpose                           |
+   |----------+--------------------+------------------------------------------------------------|
+   | f0       | Volatile           | Local variables.                                           |
+   |----------+--------------------+------------------------------------------------------------|
+   | f1-f13   | Volatile           | Used for parameter passing and return values of binary     |
+   |          |                    | float types.                                               |
+   |----------+--------------------+------------------------------------------------------------|
+   | f14-f31  | Nonvolatile        | Local variables.                                           |
+   |----------+--------------------+------------------------------------------------------------|
+   |          |                    | Floating-Point Status and Control Register limited-access  |
+   | FPSCR    | Limited-access     | bits. Preservation rules governing the limited-access bits |
+   |          |                    | for the bit fields [VE], [OE], [UE], [ZE], [XE], and [RN]  |
+   |          |                    | are presented in Section 2.2.1.2, "Limited-Access Bits".   |
+   +--------------------------------------------------------------------------------------------+
+
+    
+
+       DFP Support
+
+   The OpenPOWER ABI supports the decimal floating-point (DFP) format and DFP language
+   extensions. The default implementation of DFP types shall be an implementation of the IEEE DFP
+   standard (IEEE Standard 754-2008). The default may be either a hardware or a software
+   implementation.
+
+   The Power ISA decimal floating-point category extends the Power Architecture by adding a
+   decimal floating-point unit. It uses the existing 64-bit floating-point registers and extends
+   the FPSCR register to 64 bits, where it defines a decimal rounding-control field in the
+   extended space.
+
+   Single-precision, double-precision, and quad-precision decimal floating-point parameters shall
+   be passed in the floating-point registers. Single-precision decimal floating-point shall
+   occupy the lower half of a floating-point register. Quad-precision floating-point values shall
+   occupy an even/odd register pair. When passing quad-precision decimal floating-point
+   parameters in accordance with this ABI, an odd floating-point register may be skipped in
+   allocation order to align quad-precision parameters and results in an even/odd register pair.
+   When a floating-point register is skipped during input parameter allocation, words in the
+   corresponding GPR or memory doubleword in the parameter list are not skipped.
+
+    
+
+   Table 2.21. Floating-Point Register Roles for Decimal Floating-Point Types
+
+   +--------------------------------------------------------------------------------------------+
+   | Register | Preservation Rules |                          Purpose                           |
+   |----------+--------------------+------------------------------------------------------------|
+   |          |                    | Floating-Point Status and Control Register limited-access  |
+   | FPSCR    | Limited-access     | bits. Preservation rules governing the limited-access bits |
+   |          |                    | for the bit field [DRN] are presented in Section 2.2.1.2,  |
+   |          |                    | "Limited-Access Bits".                                     |
+   +--------------------------------------------------------------------------------------------+
+
+       Vector Registers
+
+   The OpenPOWER vector-category instruction repertoire provides the ability to reference 32
+   vector registers, each 128 bits wide, of the vector-scalar register file, and a
+   special-purpose register VSCR. Throughout this document, the symbol vN is used, where N is a
+   register number, to refer to vector register N.
+
+    
+
+   Table 2.22. Vector Register Roles
+
+   +--------------------------------------------------------------------------------------------+
+   | Register | Preservation Rules |                          Purpose                           |
+   |----------+--------------------+------------------------------------------------------------|
+   | v0-v1    | Volatile           | Local variables.                                           |
+   |----------+--------------------+------------------------------------------------------------|
+   | v2-v13   | Volatile           | Used for parameter passing and return values.              |
+   |----------+--------------------+------------------------------------------------------------|
+   | v14-v19  | Volatile           | Local variables.                                           |
+   |----------+--------------------+------------------------------------------------------------|
+   | v20-v31  | Nonvolatile        | Local variables.                                           |
+   |----------+--------------------+------------------------------------------------------------|
+   |          |                    | 32-bit Vector Status and Control Register. Preservation    |
+   | VSCR     | Limited-access     | rules governing the limited-access bits for the bit field  |
+   |          |                    | [NJ] are presented in Section 2.2.1.2, "Limited-Access     |
+   |          |                    | Bits".                                                     |
+   +--------------------------------------------------------------------------------------------+
+
+    
+
+       IEEE BINARY 128 QUADRUPLE PRECISION
+
+   Parameters and function results in IEEE BINARY 128 QUADRUPLE PRECISION format shall be passed
+   in a single 128-bit vector register as if they were vector values.
+
+    
+
+       IBM EXTENDED PRECISION
+
+   Parameters and function results in the IBM EXTENDED PRECISION format with a pair of two
+   double-precision floating-point values shall be passed in two successive floating-point
+   registers.
+
+   If only one value can be passed in a floating-point register, the second parameter will be
+   passed in a GPR or in memory in accordance with the parameter passing rules for structure
+   aggregates.
diff --git a/crypto/chacha20/chacha20.cipher b/crypto/chacha20/chacha20.cipher
new file mode 100644 (file)
index 0000000..d1cfe45
Binary files /dev/null and b/crypto/chacha20/chacha20.cipher differ
index 7b3a8c42c6ffe98507aba609c07d45db5e350711..1c6663ee928f49806f607c4a685fb5b6c80135e4 100644 (file)
@@ -2,8 +2,8 @@
 #                                  int *dither_state, float *samples,
 #                                  ptrdiff_t incr);
 1: 0x8000        # stack pointer
-3: 0x600000      # param 1: float *sunth_buf     buf
-4: 0x700000      # param 2: float *window        win
-5: 0x800000      # param 3: int *dither_state    &unused
-6: 0x900000      # param 3: float *samples       out
-7: 1             # param 5: ptr_diff_t incr      1
+3: 0x600000      # param 1: uint8_t *key
+4: 0x700000      # param 2: uint8_t *iv
+5: 0x800000      # param 3: uint8_t *correct_ciphertext
+6: 0x900000      # param 4: uint8_t plaintetx
+7: 0x500000      # param 5: uint8_t buffer
diff --git a/crypto/chacha20/chacha20.iv b/crypto/chacha20/chacha20.iv
new file mode 100644 (file)
index 0000000..e3d1d49
--- /dev/null
@@ -0,0 +1 @@
+£EõÏ\80#Q|Àüðut\8c\86_}èÊ\fr6«Ú
\ No newline at end of file
diff --git a/crypto/chacha20/chacha20.key b/crypto/chacha20/chacha20.key
new file mode 100644 (file)
index 0000000..4b14231
--- /dev/null
@@ -0,0 +1 @@
+^Å\8bmQOà¥o\1e\rê{Ü Z\10õ¶\18½¶ò&/ÌY{²0³ï
\ No newline at end of file
diff --git a/crypto/chacha20/chacha20.plain b/crypto/chacha20/chacha20.plain
new file mode 100644 (file)
index 0000000..327f38d
Binary files /dev/null and b/crypto/chacha20/chacha20.plain differ
index 4e8419afb62491f247dbfd74fc845c0836417c0b..5fb465810304f95fc19635147daa9d886afec97c 100755 (executable)
@@ -1,11 +1,13 @@
 #!/bin/sh -xe
 
-#-l data/audio/mp3/mp3_0_data/buf${1}:0x600000 \
-#-l data/audio/mp3/mp3_0_data/win0:0x700000 \
 
 pypowersim -g chacha20.gpr \
        -s common.spr \
        -p 0x20000000 \
-       -d ${2}:0x900000:128 \
+    -l ./chacha20.key:0x600000 \
+    -l ./chacha20.iv:0x700000 \
+    -l ./chacha20.cipher:0x800000 \
+    -l ./chacha20.plain:0x900000 \
+    -d ./chacha20.out:0x500000:128 \
        -i chacha20test.bin
 #cmp ${2} data/audio/mp3/mp3_0_data/samples${1}
index d12af4b65dfbf1e05e246838977524b1f3b2ad1b..44fedaf001f3d3a94260b205b1f19ea10a7da2eb 100644 (file)
@@ -20,6 +20,66 @@ void *memset(void *s, int c,  size_t len)
     return s;
 }
 
+size_t strlen(const char *s)
+{
+    unsigned int count = 0;
+    while(*s!='\0')
+    {
+        count++;
+        s++;
+    }
+    return count;
+}
+
+/* implementation of memcmp to stop complaining */
+int memcmp(const void *s1, const void *s2, size_t len)
+{
+    unsigned const char *p = s1;
+    unsigned const char *q = s2;
+    int charCompareStatus = 0;
+    //If both pointer pointing same memory block
+    if (s1 == s2)
+    {
+        return charCompareStatus;
+    }
+    while (len > 0)
+    {
+        if (*p != *q)
+        {  //compare the mismatching character
+            charCompareStatus = (*p >*q)?1:-1;
+            break;
+        }
+        len--;
+        p++;
+        q++;
+    }
+    return charCompareStatus;
+}
+
+/* Test values from Crypto++ documentation */
+uint8_t key[] = {
+        0x5E, 0xC5, 0x8B, 0x6D, 0x51, 0x4F, 0xE0, 0xA5,
+        0x6F, 0x1E, 0x0D, 0xEA, 0x7B, 0xDC, 0x09, 0x5A,
+        0x10, 0xF5, 0xB6, 0x18, 0xBD, 0xB6, 0xF2, 0x26,
+        0x2F, 0xCC, 0x59, 0x7B, 0xB2, 0x30, 0xB3, 0xEF
+};
+
+uint8_t iv[] = {
+        0xA3, 0x45, 0xF5, 0xCF, 0x80, 0x23, 0x51, 0x7C,
+        0xC0, 0xFC, 0xF0, 0x75, 0x74, 0x8C, 0x86, 0x5F,
+        0x7D, 0xE8, 0xCA, 0x0C, 0x72, 0x36, 0xAB, 0xDA
+};
+
+uint8_t correct_ciphertext[] = {
+        0xEE, 0xA7, 0xC2, 0x71, 0x19, 0x10, 0x65, 0x69,
+        0x92, 0xE1, 0xCE, 0xD8, 0x16, 0xE2, 0x0E, 0x62,
+        0x1B, 0x25, 0x17, 0x82, 0x36, 0x71, 0x6A, 0xE4,
+        0x99, 0xF2, 0x97, 0x37, 0xA7, 0x2A, 0xFC, 0xF8,
+        0x6C, 0x72
+};
+
+#define LOCATE_FUNC  __attribute__((__section__(".fixedaddr")))
+
 /** Compare our output to the output of a known good XChaCha20 library.
  * The test vectors used here are from examples given of the Crypto++
  * cryptographic library's XChaCha20 examples. These values can be
@@ -28,41 +88,14 @@ void *memset(void *s, int c,  size_t len)
  * @returns 0 on success, -1 on failure or error
  *
  */
-int check_cpp(void){
+int LOCATE_FUNC check_cpp(uint8_t *key, uint8_t *iv,
+                          uint8_t *correct_ciphertext,
+                          uint8_t *plaintext, uint8_t *buffer){
        XChaCha_ctx ctx;
-       uint8_t buffer[128];
        uint8_t counter[8] = {0x1};
 
-       /* Test values from Crypto++ documentation */
-       uint8_t key[] = {
-                       0x5E, 0xC5, 0x8B, 0x6D, 0x51, 0x4F, 0xE0, 0xA5,
-                       0x6F, 0x1E, 0x0D, 0xEA, 0x7B, 0xDC, 0x09, 0x5A,
-                       0x10, 0xF5, 0xB6, 0x18, 0xBD, 0xB6, 0xF2, 0x26,
-                       0x2F, 0xCC, 0x59, 0x7B, 0xB2, 0x30, 0xB3, 0xEF
-       };
-
-       uint8_t iv[] = {
-                       0xA3, 0x45, 0xF5, 0xCF, 0x80, 0x23, 0x51, 0x7C,
-                       0xC0, 0xFC, 0xF0, 0x75, 0x74, 0x8C, 0x86, 0x5F,
-                       0x7D, 0xE8, 0xCA, 0x0C, 0x72, 0x36, 0xAB, 0xDA
-       };
-
-       uint8_t correct_ciphertext[] = {
-                       0xEE, 0xA7, 0xC2, 0x71, 0x19, 0x10, 0x65, 0x69,
-                       0x92, 0xE1, 0xCE, 0xD8, 0x16, 0xE2, 0x0E, 0x62,
-                       0x1B, 0x25, 0x17, 0x82, 0x36, 0x71, 0x6A, 0xE4,
-                       0x99, 0xF2, 0x97, 0x37, 0xA7, 0x2A, 0xFC, 0xF8,
-                       0x6C, 0x72
-       };
-
-    // annoying: this is not word-aligned.
-       uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!!";
        uint32_t msglen = strlen((char *)plaintext);
 
-    /* knock one byte off the end */
-    plaintext[msglen-1] = 0;
-    msglen -= 1;
-
        xchacha_keysetup(&ctx, key, iv);
 
        /* Crypto++ initializes their counter to 1 instead of 0 */
@@ -77,9 +110,8 @@ int check_cpp(void){
        return(0);
 }
 
-#define LOCATE_FUNC  __attribute__((__section__(".fixedaddr")))
-
-int LOCATE_FUNC main(int argc, char **argv[]){
-       return check_cpp();
+int main(int argc, char *argv[]){
+       uint8_t buffer[128];
+       return check_cpp(NULL, NULL, NULL, NULL, buffer);
 }
 
index 9562fbf301e48f0f4335812c8b710672cb3fa7f7..4c93c70867d923888c8bf57e00628942e7bb4a94 100644 (file)
@@ -20,7 +20,7 @@
  */
 int check_cpp(void){
        XChaCha_ctx ctx;
-       uint8_t *buffer;
+       uint8_t buffer[128];
        uint8_t counter[8] = {0x1};
 
        /* Test values from Crypto++ documentation */
@@ -45,19 +45,9 @@ int check_cpp(void){
                        0x6C, 0x72
        };
 
-       uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!!";
+       uint8_t plaintext[] = "My Plaintext!! My Dear plaintext!!";
        uint32_t msglen = strlen((char *)plaintext);
 
-    /* knock one byte off the end */
-    plaintext[msglen-1] = 0;
-    msglen -= 1;
-
-       /* Allocate a buffer to hold our calculated ciphertext */
-       if((buffer = malloc(50 * sizeof(uint8_t))) == NULL){
-               perror("malloc() error");
-               return(-1);
-       }
-
        xchacha_keysetup(&ctx, key, iv);
 
        /* Crypto++ initializes their counter to 1 instead of 0 */
@@ -66,11 +56,9 @@ int check_cpp(void){
 
        /* Compare our ciphertext to the correct ciphertext */
        if(memcmp(buffer, correct_ciphertext, msglen) != 0){
-               free(buffer);
                return(-1);
        }
 
-       free(buffer);
        return(0);
 }
 
index 2b29f60b431ac82d12a4c29e8e1fb6ca9ed57a6c..27fb8f964dfef745d342a389866aa81ecb931b6d 100644 (file)
 #include <stdint.h>
 #include "xchacha20.h"
 
+#include <stdio.h>
+
+#ifdef MEMDUMP
+/* dump to address that pypowersim picks up */
+uint32_t *memdump = (uint32_t*)0x6000;
+#endif
+
+#ifdef DUMP
+static void dump_hex(const void* data, size_t size) {
+       char ascii[17];
+       size_t i, j;
+       ascii[16] = '\0';
+       for (i = 0; i < size; ++i) {
+               printf("%02X ", ((unsigned char*)data)[i]);
+               if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
+                       ascii[i % 16] = ((unsigned char*)data)[i];
+               } else {
+                       ascii[i % 16] = '.';
+               }
+               if ((i+1) % 8 == 0 || i+1 == size) {
+                       printf(" ");
+                       if ((i+1) % 16 == 0) {
+                               printf("|  %s \n", ascii);
+                       } else if (i+1 == size) {
+                               ascii[(i+1) % 16] = '\0';
+                               if ((i+1) % 16 <= 8) {
+                                       printf(" ");
+                               }
+                               for (j = (i+1) % 16; j < 16; ++j) {
+                                       printf("   ");
+                               }
+                               printf("|  %s \n", ascii);
+                       }
+               }
+       }
+}
+#endif
 
 /** hchacha an intermediary step towards XChaCha20 based on the
  * construction and security proof used to create XSalsa20.
@@ -99,6 +136,12 @@ void xchacha_keysetup(XChaCha_ctx *ctx, const uint8_t *k, uint8_t *iv){
        ctx->input[13] = 0;         /* Internal counter */
        ctx->input[14] = U8TO32_LITTLE(iv + 16);
        ctx->input[15] = U8TO32_LITTLE(iv + 20);
+
+#ifdef MEMDUMP
+    memdump = (uint32_t*)0x6000;
+    *(memdump+40) = iv; /* 0x60a0 */
+    *(memdump+41) = k2; /* 0x60a4 */
+#endif
 }
 
 
@@ -132,6 +175,16 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3
 
        if (!bytes) return;
 
+#ifdef DUMP
+    dump_hex(ctx->input, 16*4);
+#endif
+#ifdef MEMDUMP
+    memdump = (uint32_t*)0x6000; /* dump to address that pypowersim picks up */
+    for (i = 0; i < 16; i++) {
+        *(memdump+i)= ctx->input[i];
+    }
+#endif
+
        j0 = ctx->input[0];
        j1 = ctx->input[1];
        j2 = ctx->input[2];
@@ -175,7 +228,43 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3
                x15 = j15;
 
                /* Do 20 rounds instead of 8 */
-               for (i = 20;i > 0;i -= 2) {
+               for (i = 20; i > 0;i -= 2) {
+#ifdef DUMP
+            dump_hex(&x0, 4);
+            dump_hex(&x1, 4);
+            dump_hex(&x2, 4);
+            dump_hex(&x3, 4);
+            dump_hex(&x4, 4);
+            dump_hex(&x5, 4);
+            dump_hex(&x6, 4);
+            dump_hex(&x7, 4);
+            dump_hex(&x8, 4);
+            dump_hex(&x9, 4);
+            dump_hex(&x10, 4);
+            dump_hex(&x11, 4);
+            dump_hex(&x12, 4);
+            dump_hex(&x13, 4);
+            dump_hex(&x14, 4);
+            dump_hex(&x15, 4);
+#endif
+#ifdef MEMDUMP
+            *(memdump+16) = x0;
+            *(memdump+17) = x1;
+            *(memdump+18) = x2;
+            *(memdump+19) = x3;
+            *(memdump+20) = x4;
+            *(memdump+21) = x5;
+            *(memdump+22) = x6;
+            *(memdump+23) = x7;
+            *(memdump+24) = x8;
+            *(memdump+25) = x9;
+            *(memdump+26) = x10;
+            *(memdump+27) = x11;
+            *(memdump+28) = x12;
+            *(memdump+29) = x13;
+            *(memdump+30) = x14;
+            *(memdump+31) = x15;
+#endif
                        QUARTERROUND( x0, x4, x8,x12)
                        QUARTERROUND( x1, x5, x9,x13)
                        QUARTERROUND( x2, x6,x10,x14)
@@ -184,7 +273,7 @@ void xchacha_encrypt_bytes(XChaCha_ctx *ctx, const uint8_t *m, uint8_t *c, uint3
                        QUARTERROUND( x1, x6,x11,x12)
                        QUARTERROUND( x2, x7, x8,x13)
                        QUARTERROUND( x3, x4, x9,x14)
-               }
+            }
                x0 = PLUS(x0,j0);
                x1 = PLUS(x1,j1);
                x2 = PLUS(x2,j2);