From: Konstantinos Margaritis Date: Wed, 21 Sep 2022 13:07:03 +0000 (+0000) Subject: add vpx_get4x4sse_cs_svp64_real() and wrapper X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2b9447bb6ce1655ec5e1973361ca810e53025b77;p=openpower-isa.git add vpx_get4x4sse_cs_svp64_real() and wrapper --- diff --git a/media/video/libvpx/Makefile b/media/video/libvpx/Makefile index 06b82323..c4f129f8 100644 --- a/media/video/libvpx/Makefile +++ b/media/video/libvpx/Makefile @@ -10,15 +10,15 @@ CXXFLAGS= -Iinclude -O -g3 ASFLAGS= -mlibresoc LDFLAGS=-lgtest -pthread -lpython3.7m -BINFILES = variancefuncs_svp64.bin -ASFILES = variancefuncs_svp64.s +BINFILES = vpx_get_mb_ss_svp64_real.bin vpx_get4x4sse_cs_svp64_real.bin +ASFILES = vpx_get_mb_ss_svp64_real.s vpx_get4x4sse_cs_svp64_real.s CFILES = variance_ref.c variance_svp64.c variance_svp64_wrappers.c vpx_mem.c CPPFILES = test_libvpx.cc variance_test.cc EXAMPLEC = pypowersim_wrapper_example.c EXAMPLEOBJ= ${EXAMPLEC:.c=.o} OBJFILES = $(CFILES:.c=.o) $(CPPFILES:.cc=.o) $(ASFILES:.s=.o) -variancefuncs_svp64.bin: variancefuncs_svp64.o +%.bin: %.o ${OBJCOPY} -I elf64-little -O binary $< $@ ${TARGET}: ${OBJFILES} ${BINFILES} diff --git a/media/video/libvpx/variance_svp64.c b/media/video/libvpx/variance_svp64.c index f0fc3f55..cdfaa052 100644 --- a/media/video/libvpx/variance_svp64.c +++ b/media/video/libvpx/variance_svp64.c @@ -21,23 +21,6 @@ static const uint8_t bilinear_filters[8][2] = { { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; -uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride, - const uint8_t *ref_ptr, int ref_stride) { - int distortion = 0; - int r, c; - - for (r = 0; r < 4; ++r) { - for (c = 0; c < 4; ++c) { - int diff = src_ptr[c] - ref_ptr[c]; - distortion += diff * diff; - } - - src_ptr += src_stride; - ref_ptr += ref_stride; - } - - return distortion; -} static void variance_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int w, int h, diff --git a/media/video/libvpx/variance_svp64_wrappers.c b/media/video/libvpx/variance_svp64_wrappers.c index c6e8431b..da70fdf3 100644 --- a/media/video/libvpx/variance_svp64_wrappers.c +++ b/media/video/libvpx/variance_svp64_wrappers.c @@ -49,7 +49,7 @@ uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { } // GPR #3 holds the return value as an integer - PyObject *key = PyLong_FromLong(3); + PyObject *key = PyLong_FromLongLong(3); PyObject *itm = PyDict_GetItem(final_regs, key); PyObject *value = PyObject_GetAttrString(itm, "value"); uint64_t val = PyLong_AsLongLong(value); @@ -57,3 +57,91 @@ uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { // Return value return (uint32_t) val; } + +uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { + // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM + // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily + const uint64_t src_ptr_svp64 = 0x100000; + const uint64_t ref_ptr_svp64 = 0x200000; + const uint64_t *src_ptr64 = (const uint64_t *) src_ptr; + const uint64_t *ref_ptr64 = (const uint64_t *) ref_ptr; + + // Create the pypowersim_state + pypowersim_state_t *state = pypowersim_prepare(); + + // Change the relevant elements, mandatory: body + // + state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get4x4sse_cs_svp64_real, 1000); + // Set GPR #3 to the src_ptr + PyObject *src_address = PyLong_FromLongLong(src_ptr_svp64); + PyList_SetItem(state->initial_regs, 3, src_address); + // Load data into buffer from real memory + size_t size = 16*sizeof(uint16_t)/sizeof(uint64_t); + for (int i=0; i < size; i++) { + PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8); + PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i)); + PyDict_SetItem(state->initial_mem, address, word); + } + + // Set GPR #4 to the src_stride + PyList_SetItem(state->initial_regs, 4, PyLong_FromLongLong(src_stride)); + + // Set GPR #5 to the ref_ptr + PyObject *ref_address = PyLong_FromLongLong(ref_ptr_svp64); + PyList_SetItem(state->initial_regs, 5, ref_address); + // Load data into buffer from real memory + for (int i=0; i < size; i++) { + PyObject *address = PyLong_FromLongLong(ref_ptr_svp64 + i*8); + PyObject *word = PyLong_FromLongLong(*(ref_ptr64 + i)); + PyDict_SetItem(state->initial_mem, address, word); + } + + // Set GPR #6 to the ref_stride + PyList_SetItem(state->initial_regs, 4, PyLong_FromLongLong(ref_stride)); + + // Prepare the arguments object for the call + pypowersim_prepareargs(state); + + // Call the function and get the resulting object + state->result_obj = PyObject_CallObject(state->simulator, state->args); + Py_DECREF(state->simulator); + Py_DECREF(state->args); + if (!state->result_obj) { + PyErr_Print(); + printf("Error invoking 'run_a_simulation'\n"); + } + + // Get the GPRs from the result_obj + PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr"); + if (!final_regs) { + PyErr_Print(); + Py_DECREF(state->result_obj); + printf("Error getting final GPRs\n"); + } + + // GPR #3 holds the return value as an integer + PyObject *key = PyLong_FromLongLong(3); + PyObject *itm = PyDict_GetItem(final_regs, key); + PyObject *value = PyObject_GetAttrString(itm, "value"); + uint64_t val = PyLong_AsLongLong(value); + + // Return value + return (uint32_t) val; + + int distortion = 0; + int r, c; + + for (r = 0; r < 4; ++r) { + for (c = 0; c < 4; ++c) { + int diff = src_ptr[c] - ref_ptr[c]; + distortion += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + return distortion; +} + diff --git a/media/video/libvpx/variance_svp64_wrappers.h b/media/video/libvpx/variance_svp64_wrappers.h index d38ecff0..4441dcce 100644 --- a/media/video/libvpx/variance_svp64_wrappers.h +++ b/media/video/libvpx/variance_svp64_wrappers.h @@ -1,3 +1,7 @@ #include uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr); + +uint32_t vpx_get4x4sse_cs_svp64_real(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); + diff --git a/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.c.in b/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.c.in new file mode 100644 index 00000000..279f3fed --- /dev/null +++ b/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.c.in @@ -0,0 +1,20 @@ +#include + +uint32_t vpx_get4x4sse_cs_svp64_real(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { + int distortion = 0; + int r, c; + + for (r = 0; r < 4; ++r) { + for (c = 0; c < 4; ++c) { + int diff = src_ptr[c] - ref_ptr[c]; + distortion += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + return distortion; +} + diff --git a/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.s b/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.s new file mode 100644 index 00000000..1d7c6e5d --- /dev/null +++ b/media/video/libvpx/vpx_get4x4sse_cs_svp64_real.s @@ -0,0 +1,40 @@ + .file "vpx_get4x4sse_cs_svp64_real.c" + .abiversion 2 + .section ".text" + .align 2 + .globl vpx_get4x4sse_cs_svp64_real + .type vpx_get4x4sse_cs_svp64_real, @function +vpx_get4x4sse_cs_svp64_real: +.LFB0: + .cfi_startproc + addi 5,5,-1 + addi 3,3,3 + li 12,4 + li 8,0 +.L2: + addi 7,3,-4 + mr 11,5 + subf 9,7,3 + mtctr 9 +.L3: + lbzu 9,1(7) + lbzu 10,1(11) + subf 9,10,9 + mullw 9,9,9 + add 9,9,8 + extsw 8,9 + bdnz .L3 + addi 9,12,-1 + add 5,5,6 + add 3,3,4 + rldicl. 12,9,0,32 + bne 0,.L2 + rldicl 3,8,0,32 + blr + .long 0 + .byte 0,0,0,0,0,0,0,0 + .cfi_endproc +.LFE0: + .size vpx_get4x4sse_cs_svp64_real,.-vpx_get4x4sse_cs_svp64_real + .ident "GCC: (Debian 8.3.0-6) 8.3.0" + .section .note.GNU-stack,"",@progbits