uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) {
// It cannot be the same pointer as the original function, as it is really a separate CPU/RAM
// we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily
- const uint64_t src_ptr_svp64 = 0x100000;
- const uint64_t *src_ptr64 = (const uint64_t *) src_ptr;
+ uint64_t src_ptr_svp64 = 0x100000;
// Create the pypowersim_state
pypowersim_state_t *state = pypowersim_prepare();
PyObject *address = PyLong_FromLongLong(src_ptr_svp64);
PyList_SetItem(state->initial_regs, 3, address);
// Load data into buffer from real memory
- size_t size = 256*sizeof(uint16_t)/sizeof(uint64_t);
- for (int i=0; i < size; i++) {
- PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8);
- PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i));
+ for (int i=0; i < 256; i += 4) {
+ PyObject *address = PyLong_FromLongLong(src_ptr_svp64);
+ uint64_t val = src_ptr[0];
+ val |= (uint64_t)(src_ptr[1]) << 16;
+ val |= (uint64_t)(src_ptr[2]) << 32;
+ val |= (uint64_t)(src_ptr[3]) << 48;
+ //printf("src: %p -> %04x %04x %04x %04x, val: %016x -> %p\n", src_ptr, src_ptr[0], src_ptr[1], src_ptr[2], src_ptr[3], val, src_ptr_svp64);
+ PyObject *word = PyLong_FromLongLong(val);
PyDict_SetItem(state->initial_mem, address, word);
+ src_ptr += 4;
+ src_ptr_svp64 += 8;
}
// Prepare the arguments object for the call
uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride) {
+
+// vpx_get4x4sse_cs_svp64_ref(src_ptr, src_stride, ref_ptr, ref_stride);
+
// It cannot be the same pointer as the original function, as it is really a separate CPU/RAM
// we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily
- const uint64_t src_ptr_svp64 = 0x100000;
- const uint64_t ref_ptr_svp64 = 0x200000;
- const uint64_t *src_ptr64 = (const uint64_t *) src_ptr;
- const uint64_t *ref_ptr64 = (const uint64_t *) ref_ptr;
+ uint64_t src_ptr_svp64 = 0x100000;
+ uint64_t ref_ptr_svp64 = 0x200000;
// Create the pypowersim_state
pypowersim_state_t *state = pypowersim_prepare();
// Change the relevant elements, mandatory: body
- //
state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get4x4sse_cs_svp64_real, 1000);
// Set GPR #3 to the src_ptr
PyObject *src_address = PyLong_FromLongLong(src_ptr_svp64);
PyList_SetItem(state->initial_regs, 3, src_address);
// Load data into buffer from real memory
- size_t size = 16*sizeof(uint16_t)/sizeof(uint64_t);
- for (int i=0; i < size; i++) {
- PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8);
- PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i));
+ for (int r=0; r < 4; r++) {
+ PyObject *address = PyLong_FromLongLong(src_ptr_svp64);
+ uint64_t val = src_ptr[0];
+ val |= (uint64_t)(src_ptr[1]) << 16;
+ val |= (uint64_t)(src_ptr[2]) << 32;
+ val |= (uint64_t)(src_ptr[3]) << 48;
+ PyObject *word = PyLong_FromLongLong(val);
PyDict_SetItem(state->initial_mem, address, word);
+ src_ptr += src_stride;
+ src_ptr_svp64 += 8;
}
// Set GPR #4 to the src_stride
PyObject *ref_address = PyLong_FromLongLong(ref_ptr_svp64);
PyList_SetItem(state->initial_regs, 5, ref_address);
// Load data into buffer from real memory
- for (int i=0; i < size; i++) {
- PyObject *address = PyLong_FromLongLong(ref_ptr_svp64 + i*8);
- PyObject *word = PyLong_FromLongLong(*(ref_ptr64 + i));
+ for (int r=0; r < 4; r++) {
+ PyObject *address = PyLong_FromLongLong(ref_ptr_svp64);
+ uint64_t val = ref_ptr[0];
+ val |= (uint64_t)(ref_ptr[1]) << 16;
+ val |= (uint64_t)(ref_ptr[2]) << 32;
+ val |= (uint64_t)(ref_ptr[3]) << 48;
+ //printf("ref: %p -> %04x %04x %04x %04x, val: %016lx -> %p\n", ref_ptr, ref_ptr[0], ref_ptr[1], ref_ptr[2], ref_ptr[3], val, ref_ptr_svp64);
+ PyObject *word = PyLong_FromLongLong(val);
PyDict_SetItem(state->initial_mem, address, word);
+ ref_ptr += ref_stride;
+ ref_ptr_svp64 += 8;
}
// Set GPR #6 to the ref_stride
- PyList_SetItem(state->initial_regs, 4, PyLong_FromLongLong(ref_stride));
+ PyList_SetItem(state->initial_regs, 6, PyLong_FromLongLong(ref_stride));
// Prepare the arguments object for the call
pypowersim_prepareargs(state);