From 49e89f31841edba7f2aa7aeb7e376e046ed4a695 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Thu, 22 Sep 2022 08:43:26 +0000 Subject: [PATCH] better handling of memory copies, fix vpx_get4x4sse_cs_svp64 --- media/video/libvpx/variance_svp64_wrappers.c | 55 +++++++++++++------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/media/video/libvpx/variance_svp64_wrappers.c b/media/video/libvpx/variance_svp64_wrappers.c index da70fdf3..ed459040 100644 --- a/media/video/libvpx/variance_svp64_wrappers.c +++ b/media/video/libvpx/variance_svp64_wrappers.c @@ -8,8 +8,7 @@ uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily - const uint64_t src_ptr_svp64 = 0x100000; - const uint64_t *src_ptr64 = (const uint64_t *) src_ptr; + uint64_t src_ptr_svp64 = 0x100000; // Create the pypowersim_state pypowersim_state_t *state = pypowersim_prepare(); @@ -21,11 +20,17 @@ uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { PyObject *address = PyLong_FromLongLong(src_ptr_svp64); PyList_SetItem(state->initial_regs, 3, address); // Load data into buffer from real memory - size_t size = 256*sizeof(uint16_t)/sizeof(uint64_t); - for (int i=0; i < size; i++) { - PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8); - PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i)); + for (int i=0; i < 256; i += 4) { + PyObject *address = PyLong_FromLongLong(src_ptr_svp64); + uint64_t val = src_ptr[0]; + val |= (uint64_t)(src_ptr[1]) << 16; + val |= (uint64_t)(src_ptr[2]) << 32; + val |= (uint64_t)(src_ptr[3]) << 48; + //printf("src: %p -> %04x %04x %04x %04x, val: %016x -> %p\n", src_ptr, src_ptr[0], src_ptr[1], src_ptr[2], src_ptr[3], val, src_ptr_svp64); + PyObject *word = PyLong_FromLongLong(val); PyDict_SetItem(state->initial_mem, address, word); + src_ptr += 4; + src_ptr_svp64 += 8; } // Prepare the arguments object for the call @@ -60,28 +65,33 @@ uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride) { + +// vpx_get4x4sse_cs_svp64_ref(src_ptr, src_stride, ref_ptr, ref_stride); + // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily - const uint64_t src_ptr_svp64 = 0x100000; - const uint64_t ref_ptr_svp64 = 0x200000; - const uint64_t *src_ptr64 = (const uint64_t *) src_ptr; - const uint64_t *ref_ptr64 = (const uint64_t *) ref_ptr; + uint64_t src_ptr_svp64 = 0x100000; + uint64_t ref_ptr_svp64 = 0x200000; // Create the pypowersim_state pypowersim_state_t *state = pypowersim_prepare(); // Change the relevant elements, mandatory: body - // state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get4x4sse_cs_svp64_real, 1000); // Set GPR #3 to the src_ptr PyObject *src_address = PyLong_FromLongLong(src_ptr_svp64); PyList_SetItem(state->initial_regs, 3, src_address); // Load data into buffer from real memory - size_t size = 16*sizeof(uint16_t)/sizeof(uint64_t); - for (int i=0; i < size; i++) { - PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8); - PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i)); + for (int r=0; r < 4; r++) { + PyObject *address = PyLong_FromLongLong(src_ptr_svp64); + uint64_t val = src_ptr[0]; + val |= (uint64_t)(src_ptr[1]) << 16; + val |= (uint64_t)(src_ptr[2]) << 32; + val |= (uint64_t)(src_ptr[3]) << 48; + PyObject *word = PyLong_FromLongLong(val); PyDict_SetItem(state->initial_mem, address, word); + src_ptr += src_stride; + src_ptr_svp64 += 8; } // Set GPR #4 to the src_stride @@ -91,14 +101,21 @@ uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride, PyObject *ref_address = PyLong_FromLongLong(ref_ptr_svp64); PyList_SetItem(state->initial_regs, 5, ref_address); // Load data into buffer from real memory - for (int i=0; i < size; i++) { - PyObject *address = PyLong_FromLongLong(ref_ptr_svp64 + i*8); - PyObject *word = PyLong_FromLongLong(*(ref_ptr64 + i)); + for (int r=0; r < 4; r++) { + PyObject *address = PyLong_FromLongLong(ref_ptr_svp64); + uint64_t val = ref_ptr[0]; + val |= (uint64_t)(ref_ptr[1]) << 16; + val |= (uint64_t)(ref_ptr[2]) << 32; + val |= (uint64_t)(ref_ptr[3]) << 48; + //printf("ref: %p -> %04x %04x %04x %04x, val: %016lx -> %p\n", ref_ptr, ref_ptr[0], ref_ptr[1], ref_ptr[2], ref_ptr[3], val, ref_ptr_svp64); + PyObject *word = PyLong_FromLongLong(val); PyDict_SetItem(state->initial_mem, address, word); + ref_ptr += ref_stride; + ref_ptr_svp64 += 8; } // Set GPR #6 to the ref_stride - PyList_SetItem(state->initial_regs, 4, PyLong_FromLongLong(ref_stride)); + PyList_SetItem(state->initial_regs, 6, PyLong_FromLongLong(ref_stride)); // Prepare the arguments object for the call pypowersim_prepareargs(state); -- 2.30.2