--- /dev/null
+TARGET=libvpx_variance_test
+EXAMPLE=pypowersim_wrapper_example
+
+CC=gcc
+CXX=g++
+AS=powerpc64le-linux-gnu-as
+OBJCOPY=powerpc64le-linux-gnu-objcopy
+CFLAGS= -Iinclude -O -g3 -I/usr/include/python3.7m
+CXXFLAGS= -Iinclude -O -g3
+ASFLAGS= -mlibresoc
+LDFLAGS=-lgtest -pthread -lpython3.7m
+
+BINFILES = variancefuncs_svp64.bin
+ASFILES = variancefuncs_svp64.s
+CFILES = variance_ref.c variance_svp64.c variance_svp64_wrappers.c vpx_mem.c
+CPPFILES = test_libvpx.cc variance_test.cc
+EXAMPLEC = pypowersim_wrapper_example.c
+EXAMPLEOBJ= ${EXAMPLEC:.c=.o}
+OBJFILES = $(CFILES:.c=.o) $(CPPFILES:.cc=.o) $(ASFILES:.s=.o)
+
+variancefuncs_svp64.bin: variancefuncs_svp64.o
+ ${OBJCOPY} -I elf64-little -O binary $< $@
+
+${TARGET}: ${OBJFILES} ${BINFILES}
+ ${CXX} -o ${TARGET} ${OBJFILES} ${LDFLAGS}
+
+${EXAMPLE}: ${EXAMPLEOBJ}
+
+all: ${TARGET} ${EXAMPLE}
+
+.PHONY: clean
+clean:
+ rm -f ${TARGET} ${OBJFILES} ${BINFILES}
--- /dev/null
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_TEST_ACM_RANDOM_H_
+#define VPX_TEST_ACM_RANDOM_H_
+
+#include <assert.h>
+
+#include <limits>
+
+#include "gtest/gtest.h"
+
+#include "vpx_integer.h"
+
+namespace libvpx_test {
+
+class ACMRandom {
+ public:
+ ACMRandom() : random_(DeterministicSeed()) {}
+
+ explicit ACMRandom(int seed) : random_(seed) {}
+
+ void Reset(int seed) { random_.Reseed(seed); }
+ uint16_t Rand16(void) {
+ const uint32_t value =
+ random_.Generate(testing::internal::Random::kMaxRange);
+ return (value >> 15) & 0xffff;
+ }
+
+ int32_t Rand20Signed(void) {
+ // Use 20 bits: values between 524287 and -524288.
+ const uint32_t value = random_.Generate(1048576);
+ return static_cast<int32_t>(value) - 524288;
+ }
+
+ int16_t Rand16Signed(void) {
+ // Use 16 bits: values between 32767 and -32768.
+ return static_cast<int16_t>(random_.Generate(65536));
+ }
+
+ int16_t Rand13Signed(void) {
+ // Use 13 bits: values between 4095 and -4096.
+ const uint32_t value = random_.Generate(8192);
+ return static_cast<int16_t>(value) - 4096;
+ }
+
+ int16_t Rand9Signed(void) {
+ // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+ const uint32_t value = random_.Generate(512);
+ return static_cast<int16_t>(value) - 256;
+ }
+
+ uint8_t Rand8(void) {
+ const uint32_t value =
+ random_.Generate(testing::internal::Random::kMaxRange);
+ // There's a bit more entropy in the upper bits of this implementation.
+ return (value >> 23) & 0xff;
+ }
+
+ uint8_t Rand8Extremes(void) {
+ // Returns a random value near 0 or near 255, to better exercise
+ // saturation behavior.
+ const uint8_t r = Rand8();
+ return static_cast<uint8_t>((r < 128) ? r << 4 : r >> 4);
+ }
+
+ uint32_t RandRange(const uint32_t range) {
+ // testing::internal::Random::Generate provides values in the range
+ // testing::internal::Random::kMaxRange.
+ assert(range <= testing::internal::Random::kMaxRange);
+ return random_.Generate(range);
+ }
+
+ int PseudoUniform(int range) { return random_.Generate(range); }
+
+ int operator()(int n) { return PseudoUniform(n); }
+
+ static int DeterministicSeed(void) { return 0xbaba; }
+
+ private:
+ testing::internal::Random random_;
+};
+
+} // namespace libvpx_test
+
+#endif // VPX_TEST_ACM_RANDOM_H_
--- /dev/null
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VPX_TEST_CLEAR_SYSTEM_STATE_H_
+#define VPX_TEST_CLEAR_SYSTEM_STATE_H_
+
+//#include "./vpx_config.h"
+#include "system_state.h"
+
+namespace libvpx_test {
+
+// Reset system to a known state. This function should be used for all non-API
+// test cases.
+inline void ClearSystemState() { vpx_clear_system_state(); }
+
+} // namespace libvpx_test
+#endif // VPX_TEST_CLEAR_SYSTEM_STATE_H_
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_MEM_H_
+#define VPX_VPX_PORTS_MEM_H_
+
+//#include "vpx_config.h"
+#include "vpx_integer.h"
+
+#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+#elif defined(_MSC_VER)
+#define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val
+#else
+#warning No alignment directives known for this compiler.
+#define DECLARE_ALIGNED(n, typ, val) typ val
+#endif
+
+#if HAVE_NEON && defined(_MSC_VER)
+#define __builtin_prefetch(x)
+#endif
+
+/* Shift down with rounding */
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+#define ROUND64_POWER_OF_TWO(value, n) (((value) + (1ULL << ((n)-1))) >> (n))
+
+#define ALIGN_POWER_OF_TWO(value, n) \
+ (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
+#define CAST_TO_SHORTPTR(x) ((uint16_t *)((uintptr_t)(x)))
+#if CONFIG_VP9_HIGHBITDEPTH
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
+#define CAST_TO_BYTEPTR(x) ((uint8_t *)((uintptr_t)(x)))
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#endif // VPX_VPX_PORTS_MEM_H_
--- /dev/null
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_TEST_REGISTER_STATE_CHECK_H_
+#define VPX_TEST_REGISTER_STATE_CHECK_H_
+
+#include "gtest/gtest.h"
+//#include "./vpx_config.h"
+#include "vpx_integer.h"
+
+// ASM_REGISTER_STATE_CHECK(asm_function)
+// Minimally validates the environment pre & post function execution. This
+// variant should be used with assembly functions which are not expected to
+// fully restore the system state. See platform implementations of
+// RegisterStateCheck for details.
+//
+// API_REGISTER_STATE_CHECK(api_function)
+// Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
+// additional checks to ensure the environment is in a consistent state pre &
+// post function execution. This variant should be used with API functions.
+// See platform implementations of RegisterStateCheckXXX for details.
+//
+
+#if defined(_WIN64) && VPX_ARCH_X86_64
+
+#undef NOMINMAX
+#define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#include <winnt.h>
+
+inline bool operator==(const M128A &lhs, const M128A &rhs) {
+ return (lhs.Low == rhs.Low && lhs.High == rhs.High);
+}
+
+namespace libvpx_test {
+
+// Compares the state of xmm[6-15] at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// Windows x64.
+class RegisterStateCheck {
+ public:
+ RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
+ ~RegisterStateCheck() { Check(); }
+
+ private:
+ static bool StoreRegisters(CONTEXT *const context) {
+ const HANDLE this_thread = GetCurrentThread();
+ EXPECT_TRUE(this_thread != NULL);
+ context->ContextFlags = CONTEXT_FLOATING_POINT;
+ const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
+ EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
+ return context_saved;
+ }
+
+ // Compares the register state. Returns true if the states match.
+ void Check() const {
+ ASSERT_TRUE(initialized_);
+ CONTEXT post_context;
+ ASSERT_TRUE(StoreRegisters(&post_context));
+
+ const M128A *xmm_pre = &pre_context_.Xmm6;
+ const M128A *xmm_post = &post_context.Xmm6;
+ for (int i = 6; i <= 15; ++i) {
+ EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
+ ++xmm_pre;
+ ++xmm_post;
+ }
+ }
+
+ bool initialized_;
+ CONTEXT pre_context_;
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement) \
+ do { \
+ libvpx_test::RegisterStateCheck reg_check; \
+ statement; \
+ } while (false)
+
+} // namespace libvpx_test
+
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && \
+ defined(CONFIG_VP9) && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
+
+extern "C" {
+// Save the d8-d15 registers into store.
+void vpx_push_neon(int64_t *store);
+}
+
+namespace libvpx_test {
+
+// Compares the state of d8-d15 at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// arm platform.
+class RegisterStateCheck {
+ public:
+ RegisterStateCheck() { vpx_push_neon(pre_store_); }
+ ~RegisterStateCheck() { Check(); }
+
+ private:
+ // Compares the register state. Returns true if the states match.
+ void Check() const {
+ int64_t post_store[8];
+ vpx_push_neon(post_store);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(pre_store_[i], post_store[i])
+ << "d" << i + 8 << " has been modified";
+ }
+ }
+
+ int64_t pre_store_[8];
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement) \
+ do { \
+ libvpx_test::RegisterStateCheck reg_check; \
+ statement; \
+ } while (false)
+
+} // namespace libvpx_test
+
+#else
+
+namespace libvpx_test {
+
+class RegisterStateCheck {};
+#define ASM_REGISTER_STATE_CHECK(statement) statement
+
+} // namespace libvpx_test
+
+#endif // _WIN64 && VPX_ARCH_X86_64
+
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
+#if defined(__GNUC__)
+
+namespace libvpx_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+ RegisterStateCheckMMX() {
+ __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+ }
+ ~RegisterStateCheckMMX() { Check(); }
+
+ private:
+ // Checks the FPU tag word pre/post execution, returning false if not cleared
+ // to 0xffff.
+ void Check() const {
+ EXPECT_EQ(0xffff, pre_fpu_env_[4])
+ << "FPU was in an inconsistent state prior to call";
+
+ uint16_t post_fpu_env[14];
+ __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+ EXPECT_EQ(0xffff, post_fpu_env[4])
+ << "FPU was left in an inconsistent state after call";
+ }
+
+ uint16_t pre_fpu_env_[14];
+};
+
+#define API_REGISTER_STATE_CHECK(statement) \
+ do { \
+ libvpx_test::RegisterStateCheckMMX reg_check; \
+ ASM_REGISTER_STATE_CHECK(statement); \
+ } while (false)
+
+} // namespace libvpx_test
+
+#endif // __GNUC__
+#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
+
+#ifndef API_REGISTER_STATE_CHECK
+#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
+#endif
+
+#endif // VPX_TEST_REGISTER_STATE_CHECK_H_
--- /dev/null
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_SYSTEM_STATE_H_
+#define VPX_VPX_PORTS_SYSTEM_STATE_H_
+
+//#include "./vpx_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
+extern void vpx_clear_system_state(void);
+#else
+#define vpx_clear_system_state()
+#endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_VPX_PORTS_SYSTEM_STATE_H_
--- /dev/null
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_DSP_VARIANCE_H_
+#define VPX_VPX_DSP_VARIANCE_H_
+
+//#include "./vpx_config.h"
+
+#include "vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FILTER_BITS 7
+#define FILTER_WEIGHT 128
+
+typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride);
+
+typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ const uint8_t *second_pred);
+
+typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride,
+ uint8_t *ref_ptr, int ref_stride, int n);
+
+typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride,
+ unsigned int *sad_array);
+
+typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const b_array[],
+ int ref_stride, unsigned int *sad_array);
+
+typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride, unsigned int *sse);
+
+typedef unsigned int (*vpx_subpixvariance_fn_t)(
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+typedef unsigned int (*vpx_subp_avg_variance_fn_t)(
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+ const uint8_t *second_pred);
+
+#if CONFIG_VP8
+typedef struct variance_vtable {
+ vpx_sad_fn_t sdf;
+ vpx_variance_fn_t vf;
+ vpx_subpixvariance_fn_t svf;
+ vpx_sad_multi_d_fn_t sdx4df;
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
+ vp8_copy32xn_fn_t copymem;
+#endif
+} vp8_variance_fn_ptr_t;
+#endif // CONFIG_VP8
+
+#if CONFIG_VP9
+typedef struct vp9_variance_vtable {
+ vpx_sad_fn_t sdf;
+ vpx_sad_avg_fn_t sdaf;
+ vpx_variance_fn_t vf;
+ vpx_subpixvariance_fn_t svf;
+ vpx_subp_avg_variance_fn_t svaf;
+ vpx_sad_multi_d_fn_t sdx4df;
+} vp9_variance_fn_ptr_t;
+#endif // CONFIG_VP9
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_VPX_DSP_VARIANCE_H_
--- /dev/null
+// This file is generated. Do not edit.
+#ifndef VPX_DSP_RTCD_H_
+#define VPX_DSP_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * DSP
+ */
+
+#include "vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride);
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+uint32_t vpx_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+
+// SVP64 prototypes
+unsigned int vpx_get_mb_ss_svp64(const int16_t *);
+
+unsigned int vpx_get4x4sse_cs_svp64(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride);
+
+void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+
+unsigned int vpx_mse16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+uint32_t vpx_sub_pixel_avg_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+unsigned int vpx_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_VPX_INTEGER_H_
+#define VPX_VPX_VPX_INTEGER_H_
+
+/* get ptrdiff_t, size_t, wchar_t, NULL */
+#include <stddef.h>
+
+#if defined(_MSC_VER)
+#define VPX_FORCE_INLINE __forceinline
+#define VPX_INLINE __inline
+#else
+#define VPX_FORCE_INLINE __inline__ __attribute__((always_inline))
+// TODO(jbb): Allow a way to force inline off for older compilers.
+#define VPX_INLINE inline
+#endif
+
+#define INLINE VPX_INLINE
+
+/* Assume platforms have the C99 standard integer types. */
+
+#if defined(__cplusplus)
+#if !defined(__STDC_FORMAT_MACROS)
+#define __STDC_FORMAT_MACROS
+#endif
+#if !defined(__STDC_LIMIT_MACROS)
+#define __STDC_LIMIT_MACROS
+#endif
+#endif // __cplusplus
+
+#include <inttypes.h>
+#include <stdint.h>
+
+#endif // VPX_VPX_VPX_INTEGER_H_
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_MEM_VPX_MEM_H_
+#define VPX_VPX_MEM_VPX_MEM_H_
+
+//#include "vpx_config.h"
+#if defined(__uClinux__)
+#include <lddk.h>
+#endif
+
+#include <stdlib.h>
+#include <stddef.h>
+
+#include "vpx_integer.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+void *vpx_memalign(size_t align, size_t size);
+void *vpx_malloc(size_t size);
+void *vpx_calloc(size_t num, size_t size);
+void vpx_free(void *memblk);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void *vpx_memset16(void *dest, int val, size_t length) {
+ size_t i;
+ uint16_t *dest16 = (uint16_t *)dest;
+ for (i = 0; i < length; i++) *dest16++ = val;
+ return dest;
+}
+#endif
+
+#include <string.h>
+
+#ifdef VPX_MEM_PLTFRM
+#include VPX_MEM_PLTFRM
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // VPX_VPX_MEM_VPX_MEM_H_
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
+#define VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
+//#include "./vpx_config.h"
+
+#define ADDRESS_STORAGE_SIZE sizeof(size_t)
+
+#ifndef DEFAULT_ALIGNMENT
+#if defined(VXWORKS)
+/*default addr alignment to use in calls to vpx_* functions other than
+ * vpx_memalign*/
+#define DEFAULT_ALIGNMENT 32
+#else
+#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) /* NOLINT */
+#endif
+#endif
+
+/*returns an addr aligned to the byte boundary specified by align*/
+#define align_addr(addr, align) \
+ (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
+
+#endif // VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
--- /dev/null
+#ifndef VPX_VPX_PORTS_MISC_H_
+#define VPX_VPX_PORTS_MISC_H_
+
+/*!\brief Bit depth for codec
+ * *
+ * This enumeration determines the bit depth of the codec.
+ */
+typedef enum vpx_bit_depth {
+ VPX_BITS_8 = 8, /**< 8 bits */
+ VPX_BITS_10 = 10, /**< 10 bits */
+ VPX_BITS_12 = 12, /**< 12 bits */
+} vpx_bit_depth_t;
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_VPX_TIMER_H_
+#define VPX_VPX_PORTS_VPX_TIMER_H_
+
+//#include "./vpx_config.h"
+
+#include "vpx_integer.h"
+
+#if CONFIG_OS_SUPPORT
+
+#if defined(_WIN32)
+/*
+ * Win32 specific includes
+ */
+#undef NOMINMAX
+#define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#else
+/*
+ * POSIX specific includes
+ */
+#include <sys/time.h>
+
+/* timersub is not provided by msys at this time. */
+#ifndef timersub
+#define timersub(a, b, result) \
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif
+#endif
+
+struct vpx_usec_timer {
+#if defined(_WIN32)
+ LARGE_INTEGER begin, end;
+#else
+ struct timeval begin, end;
+#endif
+};
+
+static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+ QueryPerformanceCounter(&t->begin);
+#else
+ gettimeofday(&t->begin, NULL);
+#endif
+}
+
+static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+ QueryPerformanceCounter(&t->end);
+#else
+ gettimeofday(&t->end, NULL);
+#endif
+}
+
+static INLINE int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+ LARGE_INTEGER freq, diff;
+
+ diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
+
+ QueryPerformanceFrequency(&freq);
+ return diff.QuadPart * 1000000 / freq.QuadPart;
+#else
+ struct timeval diff;
+
+ timersub(&t->end, &t->begin, &diff);
+ return (int64_t)diff.tv_sec * 1000000 + diff.tv_usec;
+#endif
+}
+
+#else /* CONFIG_OS_SUPPORT = 0*/
+
+/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */
+#ifndef timersub
+#define timersub(a, b, result)
+#endif
+
+struct vpx_usec_timer {
+ void *dummy;
+};
+
+static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {}
+
+static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {}
+
+static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; }
+
+#endif /* CONFIG_OS_SUPPORT */
+
+#endif // VPX_VPX_PORTS_VPX_TIMER_H_
--- /dev/null
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <string>
+
+#include <gtest/gtest.h>
+
+int main(int argc, char **argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+
+ return RUN_ALL_TESTS();
+}
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+
+#define FILTER_BITS 7
+
+static const uint8_t bilinear_filters[8][2] = {
+ { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
+ { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
+};
+
+uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride) {
+ int distortion = 0;
+ int r, c;
+
+ for (r = 0; r < 4; ++r) {
+ for (c = 0; c < 4; ++c) {
+ int diff = src_ptr[c] - ref_ptr[c];
+ distortion += diff * diff;
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ return distortion;
+}
+
+uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
+ unsigned int i, sum = 0;
+
+ for (i = 0; i < 256; ++i) {
+ sum += src_ptr[i] * src_ptr[i];
+ }
+
+ return sum;
+}
+
+static void variance(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w, int h,
+ uint32_t *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; ++j) {
+ const int diff = src_ptr[j] - ref_ptr[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the first-pass of 2-D separable filter.
+//
+// Produces int16_t output to retain precision for the next pass. Two filter
+// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
+// It defines the offset required to move from one input to the next.
+static void var_filter_block2d_bil_first_pass(
+ const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+ int pixel_step, unsigned int output_height, unsigned int output_width,
+ const uint8_t *filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+
+ ++src_ptr;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
+ }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the second-pass of 2-D separable filter.
+//
+// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step = 1) or vertically
+// (pixel_step = stride). It defines the offset required to move from one input
+// to the next. Output is 8-bit.
+static void var_filter_block2d_bil_second_pass(
+ const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+ unsigned int pixel_step, unsigned int output_height,
+ unsigned int output_width, const uint8_t *filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+ ++src_ptr;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
+ }
+}
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride) {
+ int i, j;
+
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ }
+
+#define SUBPIX_VAR(W, H) \
+ uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \
+ }
+
+#define SUBPIX_AVG_VAR(W, H) \
+ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
+ \
+ return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \
+ }
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
+ }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse; \
+ }
+
+/* All three forms of the variance are available in the same sizes. */
+#define VARIANCES(W, H) \
+ VAR(W, H) \
+ SUBPIX_VAR(W, H) \
+ SUBPIX_AVG_VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+
+#define FILTER_BITS 7
+
+static const uint8_t bilinear_filters[8][2] = {
+ { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
+ { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
+};
+
+uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride) {
+ int distortion = 0;
+ int r, c;
+
+ for (r = 0; r < 4; ++r) {
+ for (c = 0; c < 4; ++c) {
+ int diff = src_ptr[c] - ref_ptr[c];
+ distortion += diff * diff;
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ return distortion;
+}
+
+/*
+uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) {
+ unsigned int i, sum = 0;
+
+ for (i = 0; i < 256; ++i) {
+ sum += src_ptr[i] * src_ptr[i];
+ }
+
+ return sum;
+}*/
+
+static void variance_svp64(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w, int h,
+ uint32_t *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; ++j) {
+ const int diff = src_ptr[j] - ref_ptr[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the first-pass of 2-D separable filter.
+//
+// Produces int16_t output to retain precision for the next pass. Two filter
+// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
+// It defines the offset required to move from one input to the next.
+static void var_filter_block2d_bil_first_pass_svp64(
+ const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+ int pixel_step, unsigned int output_height, unsigned int output_width,
+ const uint8_t *filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+
+ ++src_ptr;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
+ }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the second-pass of 2-D separable filter.
+//
+// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step = 1) or vertically
+// (pixel_step = stride). It defines the offset required to move from one input
+// to the next. Output is 8-bit.
+static void var_filter_block2d_bil_second_pass_svp64(
+ const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+ unsigned int pixel_step, unsigned int output_height,
+ unsigned int output_width, const uint8_t *filter) {
+ unsigned int i, j;
+
+ for (i = 0; i < output_height; ++i) {
+ for (j = 0; j < output_width; ++j) {
+ ref_ptr[j] = ROUND_POWER_OF_TWO(
+ (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+ FILTER_BITS);
+ ++src_ptr;
+ }
+
+ src_ptr += src_pixels_per_line - output_width;
+ ref_ptr += output_width;
+ }
+}
+
+void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride) {
+ int i, j;
+
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ }
+
+#define SUBPIX_VAR(W, H) \
+ uint32_t vpx_sub_pixel_variance##W##x##H##_svp64( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ \
+ var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ return vpx_variance##W##x##H##_svp64(temp2, W, ref_ptr, ref_stride, sse);\
+ }
+
+#define SUBPIX_AVG_VAR(W, H) \
+ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_svp64( \
+ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
+ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
+ const uint8_t *second_pred) { \
+ uint16_t fdata3[(H + 1) * W]; \
+ uint8_t temp2[H * W]; \
+ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
+ \
+ var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \
+ W, bilinear_filters[x_offset]); \
+ var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W, \
+ bilinear_filters[y_offset]); \
+ \
+ vpx_comp_avg_pred_svp64(temp3, second_pred, W, H, temp2, W); \
+ \
+ return vpx_variance##W##x##H##_svp64(temp3, W, ref_ptr, ref_stride, sse);\
+ }
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_svp64(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
+ }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+ return *sse; \
+ }
+
+/* All three forms of the variance are available in the same sizes. */
+#define VARIANCES(W, H) \
+ VAR(W, H) \
+ SUBPIX_VAR(W, H) \
+ SUBPIX_AVG_VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+
--- /dev/null
+#include <Python.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "pypowersim_wrapper_common.h"
+#include "variance_svp64_wrappers.h"
+
+uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) {
+ // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM
+ // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily
+ const uint64_t src_ptr_svp64 = 0x100000;
+ const uint64_t *src_ptr64 = (const uint64_t *) src_ptr;
+
+ // Create the pypowersim_state
+ pypowersim_state_t *state = pypowersim_prepare();
+
+ // Change the relevant elements, mandatory: body
+ //
+ state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get_mb_ss_svp64_real, 1000);
+ // Set GPR #3 to the pointer
+ PyObject *address = PyLong_FromLongLong(src_ptr_svp64);
+ PyList_SetItem(state->initial_regs, 3, address);
+ // Load data into buffer from real memory
+ size_t size = 256*sizeof(uint16_t)/sizeof(uint64_t);
+ for (int i=0; i < size; i++) {
+ PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8);
+ PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i));
+ PyDict_SetItem(state->initial_mem, address, word);
+ }
+
+ // Prepare the arguments object for the call
+ pypowersim_prepareargs(state);
+
+ // Call the function and get the resulting object
+ state->result_obj = PyObject_CallObject(state->simulator, state->args);
+ Py_DECREF(state->simulator);
+ Py_DECREF(state->args);
+ if (!state->result_obj) {
+ PyErr_Print();
+ printf("Error invoking 'run_a_simulation'\n");
+ }
+
+ // Get the GPRs from the result_obj
+ PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr");
+ if (!final_regs) {
+ PyErr_Print();
+ Py_DECREF(state->result_obj);
+ printf("Error getting final GPRs\n");
+ }
+
+ // GPR #3 holds the return value as an integer
+ PyObject *key = PyLong_FromLong(3);
+ PyObject *itm = PyDict_GetItem(final_regs, key);
+ PyObject *value = PyObject_GetAttrString(itm, "value");
+ uint64_t val = PyLong_AsLongLong(value);
+
+ // Return value
+ return (uint32_t) val;
+}
--- /dev/null
+#include <stdint.h>
+
+uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr);
--- /dev/null
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdlib>
+#include <new>
+
+#include <gtest/gtest.h>
+
+#include "vpx_misc.h"
+#include "vpx_dsp_rtcd.h"
+#include "acm_random.h"
+#include "clear_system_state.h"
+#include "register_state_check.h"
+#include "vpx_integer.h"
+#include "variance.h"
+#include "vpx_mem.h"
+#include "mem.h"
+#include "vpx_timer.h"
+
+namespace {
+
+typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride);
+typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+
+using libvpx_test::ACMRandom;
+
+// Truncate high bit depth results by downshifting (with rounding) by:
+// 2 * (bit_depth - 8) for sse
+// (bit_depth - 8) for se
+static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
+ switch (bit_depth) {
+ case VPX_BITS_12:
+ *sse = (*sse + 128) >> 8;
+ *se = (*se + 8) >> 4;
+ break;
+ case VPX_BITS_10:
+ *sse = (*sse + 8) >> 4;
+ *se = (*se + 2) >> 2;
+ break;
+ case VPX_BITS_8:
+ default: break;
+ }
+}
+
+static unsigned int mb_ss_ref(const int16_t *src) {
+ unsigned int res = 0;
+ for (int i = 0; i < 256; ++i) {
+ res += src[i] * src[i];
+ }
+ return res;
+}
+
+/* Note:
+ * Our codebase calculates the "diff" value in the variance algorithm by
+ * (src - ref).
+ */
+static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
+ int l2h, int src_stride, int ref_stride,
+ uint32_t *sse_ptr, bool use_high_bit_depth_,
+ vpx_bit_depth_t bit_depth) {
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ int diff;
+ if (!use_high_bit_depth_) {
+ diff = src[y * src_stride + x] - ref[y * ref_stride + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ RoundHighBitDepth(bit_depth, &se, &sse);
+ *sse_ptr = static_cast<uint32_t>(sse);
+ return static_cast<uint32_t>(
+ sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
+static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
+ int l2w, int l2h, int xoff, int yoff,
+ uint32_t *sse_ptr, bool use_high_bit_depth_,
+ vpx_bit_depth_t bit_depth) {
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ // Bilinear interpolation at a 16th pel step.
+ if (!use_high_bit_depth_) {
+ const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff = r - src[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ RoundHighBitDepth(bit_depth, &se, &sse);
+ *sse_ptr = static_cast<uint32_t>(sse);
+ return static_cast<uint32_t>(
+ sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
+ const uint8_t *second_pred, int l2w,
+ int l2h, int xoff, int yoff,
+ uint32_t *sse_ptr,
+ bool use_high_bit_depth,
+ vpx_bit_depth_t bit_depth) {
+ int64_t se = 0;
+ uint64_t sse = 0;
+ const int w = 1 << l2w;
+ const int h = 1 << l2h;
+
+ xoff <<= 1;
+ yoff <<= 1;
+
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ // bilinear interpolation at a 16th pel step
+ if (!use_high_bit_depth) {
+ const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+ const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+ const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+ const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+ const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+ const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+ const int r = a + (((b - a) * yoff + 8) >> 4);
+ const int diff =
+ ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+ se += diff;
+ sse += diff * diff;
+ }
+ }
+ }
+ RoundHighBitDepth(bit_depth, &se, &sse);
+ *sse_ptr = static_cast<uint32_t>(sse);
+ return static_cast<uint32_t>(
+ sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
+ public:
+ SumOfSquaresTest() : func_(GetParam()) {}
+
+ virtual ~SumOfSquaresTest() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ void ConstTest();
+ void RefTest();
+
+ SumOfSquaresFunction func_;
+ ACMRandom rnd_;
+};
+
+void SumOfSquaresTest::ConstTest() {
+ int16_t mem[256];
+ unsigned int res;
+ for (int v = 0; v < 20; ++v) {
+ for (int i = 0; i < 256; ++i) {
+ mem[i] = v;
+ }
+ ASM_REGISTER_STATE_CHECK(res = func_(mem));
+ EXPECT_EQ(256u * (v * v), res);
+ }
+}
+
+void SumOfSquaresTest::RefTest() {
+ int16_t mem[256];
+ for (int i = 0; i < 20; ++i) {
+ for (int j = 0; j < 256; ++j) {
+ mem[j] = rnd_.Rand8() - rnd_.Rand8();
+ }
+
+ const unsigned int expected = mb_ss_ref(mem);
+ unsigned int res;
+ ASM_REGISTER_STATE_CHECK(res = func_(mem));
+ EXPECT_EQ(expected, res);
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Encapsulating struct to store the function to test along with
+// some testing context.
+// Can be used for MSE, SSE, Variance, etc.
+
+template <typename Func>
+struct TestParams {
+ TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
+ int bit_depth_value = 0)
+ : log2width(log2w), log2height(log2h), func(function) {
+ use_high_bit_depth = (bit_depth_value > 0);
+ if (use_high_bit_depth) {
+ bit_depth = static_cast<vpx_bit_depth_t>(bit_depth_value);
+ } else {
+ bit_depth = VPX_BITS_8;
+ }
+ width = 1 << log2width;
+ height = 1 << log2height;
+ block_size = width * height;
+ mask = (1u << bit_depth) - 1;
+ }
+
+ int log2width, log2height;
+ int width, height;
+ int block_size;
+ Func func;
+ vpx_bit_depth_t bit_depth;
+ bool use_high_bit_depth;
+ uint32_t mask;
+};
+
+template <typename Func>
+std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
+ return os << "log2width/height:" << p.log2width << "/" << p.log2height
+ << " function:" << reinterpret_cast<const void *>(p.func)
+ << " bit-depth:" << p.bit_depth;
+}
+
+// Main class for testing a function type
+template <typename FunctionType>
+class MainTestClass
+ : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+ virtual void SetUp() {
+ params_ = this->GetParam();
+
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ const size_t unit =
+ use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
+ src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size() * unit));
+ ref_ = new uint8_t[block_size() * unit];
+ ASSERT_NE(src_, nullptr);
+ ASSERT_NE(ref_, nullptr);
+ }
+
+ virtual void TearDown() {
+
+ vpx_free(src_);
+ delete[] ref_;
+ src_ = nullptr;
+ ref_ = nullptr;
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ // We could sub-class MainTestClass into dedicated class for Variance
+ // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
+ // to access top class fields xxx. That's cumbersome, so for now we'll just
+ // implement the testing methods here:
+
+ // Variance tests
+ void ZeroTest();
+ void RefTest();
+ void RefStrideTest();
+ void OneQuarterTest();
+ void SpeedTest();
+
+ // MSE/SSE tests
+ void RefTestMse();
+ void RefTestSse();
+ void MaxTestMse();
+ void MaxTestSse();
+
+ protected:
+ ACMRandom rnd_;
+ uint8_t *src_;
+ uint8_t *ref_;
+ TestParams<FunctionType> params_;
+
+ // some relay helpers
+ bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+ int byte_shift() const { return params_.bit_depth - 8; }
+ int block_size() const { return params_.block_size; }
+ int width() const { return params_.width; }
+ int height() const { return params_.height; }
+ uint32_t mask() const { return params_.mask; }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to variance.
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::ZeroTest() {
+ for (int i = 0; i <= 255; ++i) {
+ if (!use_high_bit_depth()) {
+ memset(src_, i, block_size());
+ } else {
+ uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
+ for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
+ }
+ for (int j = 0; j <= 255; ++j) {
+ if (!use_high_bit_depth()) {
+ memset(ref_, j, block_size());
+ } else {
+ uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
+ for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
+ }
+ unsigned int sse, var;
+ ASM_REGISTER_STATE_CHECK(
+ var = params_.func(src_, width(), ref_, width(), &sse));
+ EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
+ }
+ }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefTest() {
+ for (int i = 0; i < 10; ++i) {
+ for (int j = 0; j < block_size(); j++) {
+ if (!use_high_bit_depth()) {
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
+ }
+ }
+ unsigned int sse1, sse2, var1, var2;
+ const int stride = width();
+ ASM_REGISTER_STATE_CHECK(
+ var1 = params_.func(src_, stride, ref_, stride, &sse1));
+ var2 =
+ variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+ stride, &sse2, use_high_bit_depth(), params_.bit_depth);
+ EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+ EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+ }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefStrideTest() {
+ for (int i = 0; i < 10; ++i) {
+ const int ref_stride = (i & 1) * width();
+ const int src_stride = ((i >> 1) & 1) * width();
+ for (int j = 0; j < block_size(); j++) {
+ const int ref_ind = (j / width()) * ref_stride + j % width();
+ const int src_ind = (j / width()) * src_stride + j % width();
+ if (!use_high_bit_depth()) {
+ src_[src_ind] = rnd_.Rand8();
+ ref_[ref_ind] = rnd_.Rand8();
+ }
+ }
+ unsigned int sse1, sse2;
+ unsigned int var1, var2;
+
+ ASM_REGISTER_STATE_CHECK(
+ var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
+ var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
+ src_stride, ref_stride, &sse2, use_high_bit_depth(),
+ params_.bit_depth);
+ EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+ EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+ }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
+ const int half = block_size() / 2;
+ if (!use_high_bit_depth()) {
+ memset(src_, 255, block_size());
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half);
+ }
+ unsigned int sse, var, expected;
+ ASM_REGISTER_STATE_CHECK(
+ var = params_.func(src_, width(), ref_, width(), &sse));
+ expected = block_size() * 255 * 255 / 4;
+ EXPECT_EQ(expected, var);
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::SpeedTest() {
+ const int half = block_size() / 2;
+ if (!use_high_bit_depth()) {
+ memset(src_, 255, block_size());
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half);
+ }
+ unsigned int sse;
+
+ vpx_usec_timer timer;
+ vpx_usec_timer_start(&timer);
+ for (int i = 0; i < (1 << 30) / block_size(); ++i) {
+ const uint32_t variance = params_.func(src_, width(), ref_, width(), &sse);
+ // Ignore return value.
+ (void)variance;
+ }
+ vpx_usec_timer_mark(&timer);
+ const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ printf("Variance %dx%d time: %5d ms\n", width(), height(),
+ elapsed_time / 1000);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to MSE / SSE.
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestMse() {
+ for (int i = 0; i < 10; ++i) {
+ for (int j = 0; j < block_size(); ++j) {
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
+ }
+ unsigned int sse1, sse2;
+ const int stride = width();
+ ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
+ variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+ stride, &sse2, false, VPX_BITS_8);
+ EXPECT_EQ(sse1, sse2);
+ }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestSse() {
+ for (int i = 0; i < 10; ++i) {
+ for (int j = 0; j < block_size(); ++j) {
+ src_[j] = rnd_.Rand8();
+ ref_[j] = rnd_.Rand8();
+ }
+ unsigned int sse2;
+ unsigned int var1;
+ const int stride = width();
+ ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
+ variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+ stride, &sse2, false, VPX_BITS_8);
+ EXPECT_EQ(var1, sse2);
+ }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestMse() {
+ memset(src_, 255, block_size());
+ memset(ref_, 0, block_size());
+ unsigned int sse;
+ ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
+ const unsigned int expected = block_size() * 255 * 255;
+ EXPECT_EQ(expected, sse);
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestSse() {
+ memset(src_, 255, block_size());
+ memset(ref_, 0, block_size());
+ unsigned int var;
+ ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
+ const unsigned int expected = block_size() * 255 * 255;
+ EXPECT_EQ(expected, var);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename FunctionType>
+class SubpelVarianceTest
+ : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+ virtual void SetUp() {
+ params_ = this->GetParam();
+
+ rnd_.Reset(ACMRandom::DeterministicSeed());
+ if (!use_high_bit_depth()) {
+ src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size()));
+ sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size()));
+ ref_ = reinterpret_cast<uint8_t *>(
+ vpx_malloc(block_size() + width() + height() + 1));
+ }
+ ASSERT_NE(src_, nullptr);
+ ASSERT_NE(sec_, nullptr);
+ ASSERT_NE(ref_, nullptr);
+ }
+
+ virtual void TearDown() {
+ if (!use_high_bit_depth()) {
+ vpx_free(src_);
+ vpx_free(sec_);
+ vpx_free(ref_);
+ }
+ libvpx_test::ClearSystemState();
+ }
+
+ protected:
+ void RefTest();
+ void ExtremeRefTest();
+ void SpeedTest();
+
+ ACMRandom rnd_;
+ uint8_t *src_;
+ uint8_t *ref_;
+ uint8_t *sec_;
+ TestParams<FunctionType> params_;
+
+ // some relay helpers
+ bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+ int byte_shift() const { return params_.bit_depth - 8; }
+ int block_size() const { return params_.block_size; }
+ int width() const { return params_.width; }
+ int height() const { return params_.height; }
+ uint32_t mask() const { return params_.mask; }
+};
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
+ if (!use_high_bit_depth()) {
+ for (int j = 0; j < block_size(); j++) {
+ src_[j] = rnd_.Rand8();
+ }
+ for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+ ref_[j] = rnd_.Rand8();
+ }
+ }
+ unsigned int sse1, sse2;
+ unsigned int var1;
+ ASM_REGISTER_STATE_CHECK(
+ var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+ const unsigned int var2 = subpel_variance_ref(
+ ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+ use_high_bit_depth(), params_.bit_depth);
+ EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+ EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+ }
+ }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+ // Compare against reference.
+ // Src: Set the first half of values to 0, the second half to the maximum.
+ // Ref: Set the first half of values to the maximum, the second half to 0.
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
+ const int half = block_size() / 2;
+ if (!use_high_bit_depth()) {
+ memset(src_, 0, half);
+ memset(src_ + half, 255, half);
+ memset(ref_, 255, half);
+ memset(ref_ + half, 0, half + width() + height() + 1);
+ }
+ unsigned int sse1, sse2;
+ unsigned int var1;
+ ASM_REGISTER_STATE_CHECK(
+ var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+ const unsigned int var2 = subpel_variance_ref(
+ ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+ use_high_bit_depth(), params_.bit_depth);
+ EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+ EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+ }
+ }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
+ // The only interesting points are 0, 4, and anything else. To make the loops
+ // simple we will use 0, 2 and 4.
+ for (int x = 0; x <= 4; x += 2) {
+ for (int y = 0; y <= 4; y += 2) {
+ if (!use_high_bit_depth()) {
+ memset(src_, 25, block_size());
+ memset(ref_, 50, block_size());
+ }
+ unsigned int sse;
+ vpx_usec_timer timer;
+ vpx_usec_timer_start(&timer);
+ for (int i = 0; i < 1000000000 / block_size(); ++i) {
+ const uint32_t variance =
+ params_.func(ref_, width() + 1, x, y, src_, width(), &sse);
+ (void)variance;
+ }
+ vpx_usec_timer_mark(&timer);
+ const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ printf("SubpelVariance %dx%d xoffset: %d yoffset: %d time: %5d ms\n",
+ width(), height(), x, y, elapsed_time / 1000);
+ }
+ }
+}
+
+template <>
+void SubpelVarianceTest<vpx_subp_avg_variance_fn_t>::RefTest() {
+ for (int x = 0; x < 8; ++x) {
+ for (int y = 0; y < 8; ++y) {
+ if (!use_high_bit_depth()) {
+ for (int j = 0; j < block_size(); j++) {
+ src_[j] = rnd_.Rand8();
+ sec_[j] = rnd_.Rand8();
+ }
+ for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+ ref_[j] = rnd_.Rand8();
+ }
+ }
+ uint32_t sse1, sse2;
+ uint32_t var1, var2;
+ ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
+ src_, width(), &sse1, sec_));
+ var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
+ params_.log2height, x, y, &sse2,
+ use_high_bit_depth(), params_.bit_depth);
+ EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+ EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+ }
+ }
+}
+
+typedef MainTestClass<Get4x4SseFunc> VpxSseTest;
+typedef MainTestClass<vpx_variance_fn_t> VpxMseTest;
+typedef MainTestClass<vpx_variance_fn_t> VpxVarianceTest;
+typedef SubpelVarianceTest<vpx_subpixvariance_fn_t> VpxSubpelVarianceTest;
+typedef SubpelVarianceTest<vpx_subp_avg_variance_fn_t> VpxSubpelAvgVarianceTest;
+
+TEST_P(VpxSseTest, RefSse) { RefTestSse(); }
+TEST_P(VpxSseTest, MaxSse) { MaxTestSse(); }
+TEST_P(VpxMseTest, RefMse) { RefTestMse(); }
+TEST_P(VpxMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(VpxVarianceTest, Zero) { ZeroTest(); }
+TEST_P(VpxVarianceTest, Ref) { RefTest(); }
+TEST_P(VpxVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(VpxVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
+TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
+TEST_P(VpxSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VpxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
+ ::testing::Values(vpx_get_mb_ss_c));
+
+typedef TestParams<Get4x4SseFunc> SseParams;
+INSTANTIATE_TEST_SUITE_P(C, VpxSseTest,
+ ::testing::Values(SseParams(2, 2,
+ &vpx_get4x4sse_cs_c)));
+
+typedef TestParams<vpx_variance_fn_t> MseParams;
+INSTANTIATE_TEST_SUITE_P(C, VpxMseTest,
+ ::testing::Values(MseParams(4, 4, &vpx_mse16x16_c),
+ MseParams(4, 3, &vpx_mse16x8_c),
+ MseParams(3, 4, &vpx_mse8x16_c),
+ MseParams(3, 3, &vpx_mse8x8_c)));
+
+typedef TestParams<vpx_variance_fn_t> VarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ C, VpxVarianceTest,
+ ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_c),
+ VarianceParams(6, 5, &vpx_variance64x32_c),
+ VarianceParams(5, 6, &vpx_variance32x64_c),
+ VarianceParams(5, 5, &vpx_variance32x32_c),
+ VarianceParams(5, 4, &vpx_variance32x16_c),
+ VarianceParams(4, 5, &vpx_variance16x32_c),
+ VarianceParams(4, 4, &vpx_variance16x16_c),
+ VarianceParams(4, 3, &vpx_variance16x8_c),
+ VarianceParams(3, 4, &vpx_variance8x16_c),
+ VarianceParams(3, 3, &vpx_variance8x8_c),
+ VarianceParams(3, 2, &vpx_variance8x4_c),
+ VarianceParams(2, 3, &vpx_variance4x8_c),
+ VarianceParams(2, 2, &vpx_variance4x4_c)));
+
+typedef TestParams<vpx_subpixvariance_fn_t> SubpelVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ C, VpxSubpelVarianceTest,
+ ::testing::Values(
+ SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+ SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
+ SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
+ SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
+ SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_c, 0),
+ SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_c, 0),
+ SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_c, 0),
+ SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_c, 0),
+ SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_c, 0),
+ SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_c, 0),
+ SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_c, 0),
+ SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_c, 0),
+ SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_c, 0)));
+
+typedef TestParams<vpx_subp_avg_variance_fn_t> SubpelAvgVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ C, VpxSubpelAvgVarianceTest,
+ ::testing::Values(
+ SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+ SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
+ SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
+ SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
+ SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0),
+ SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0),
+ SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0),
+ SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0),
+ SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0),
+ SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0),
+ SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0),
+ SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0),
+ SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0)));
+
+INSTANTIATE_TEST_SUITE_P(SVP64, SumOfSquaresTest,
+ ::testing::Values(vpx_get_mb_ss_svp64));
+
+typedef TestParams<Get4x4SseFunc> SseParams;
+INSTANTIATE_TEST_SUITE_P(SVP64, VpxSseTest,
+ ::testing::Values(SseParams(2, 2,
+ &vpx_get4x4sse_cs_svp64)));
+
+typedef TestParams<vpx_variance_fn_t> MseParams;
+INSTANTIATE_TEST_SUITE_P(SVP64, VpxMseTest,
+ ::testing::Values(MseParams(4, 4, &vpx_mse16x16_svp64),
+ MseParams(4, 3, &vpx_mse16x8_svp64),
+ MseParams(3, 4, &vpx_mse8x16_svp64),
+ MseParams(3, 3, &vpx_mse8x8_svp64)));
+
+typedef TestParams<vpx_variance_fn_t> VarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ SVP64, VpxVarianceTest,
+ ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_svp64),
+ VarianceParams(6, 5, &vpx_variance64x32_svp64),
+ VarianceParams(5, 6, &vpx_variance32x64_svp64),
+ VarianceParams(5, 5, &vpx_variance32x32_svp64),
+ VarianceParams(5, 4, &vpx_variance32x16_svp64),
+ VarianceParams(4, 5, &vpx_variance16x32_svp64),
+ VarianceParams(4, 4, &vpx_variance16x16_svp64),
+ VarianceParams(4, 3, &vpx_variance16x8_svp64),
+ VarianceParams(3, 4, &vpx_variance8x16_svp64),
+ VarianceParams(3, 3, &vpx_variance8x8_svp64),
+ VarianceParams(3, 2, &vpx_variance8x4_svp64),
+ VarianceParams(2, 3, &vpx_variance4x8_svp64),
+ VarianceParams(2, 2, &vpx_variance4x4_svp64)));
+
+typedef TestParams<vpx_subpixvariance_fn_t> SubpelVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ SVP64, VpxSubpelVarianceTest,
+ ::testing::Values(
+ SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_svp64, 0),
+ SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_svp64, 0),
+ SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_svp64, 0),
+ SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_svp64, 0),
+ SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_svp64, 0),
+ SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_svp64, 0),
+ SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_svp64, 0),
+ SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_svp64, 0),
+ SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_svp64, 0),
+ SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_svp64, 0),
+ SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_svp64, 0),
+ SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_svp64, 0),
+ SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_svp64, 0)));
+
+typedef TestParams<vpx_subp_avg_variance_fn_t> SubpelAvgVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+ SVP64, VpxSubpelAvgVarianceTest,
+ ::testing::Values(
+ SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_svp64, 0),
+ SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_svp64, 0),
+ SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_svp64, 0),
+ SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_svp64, 0),
+ SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_svp64, 0),
+ SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_svp64, 0),
+ SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_svp64, 0),
+ SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_svp64, 0),
+ SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_svp64, 0),
+ SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_svp64, 0),
+ SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_svp64, 0),
+ SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_svp64, 0),
+ SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_svp64, 0)));
+
+} // namespace
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr) {
+ unsigned int i, sum = 0;
+
+ for (i = 0; i < 256; ++i) {
+ sum += src_ptr[i] * src_ptr[i];
+ }
+
+ return sum;
+}
+
--- /dev/null
+ .file "variancefuncs_svp64.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .globl vpx_get_mb_ss_svp64_real
+ .type vpx_get_mb_ss_svp64_real, @function
+vpx_get_mb_ss_svp64_real:
+.LFB0:
+ .cfi_startproc
+ addi 10,3,-2
+ li 3,0
+ li 9,256
+ mtctr 9
+.L2:
+ lhau 9,2(10)
+ mullw 9,9,9
+ add 9,9,3
+ rldicl 3,9,0,32
+ bdnz .L2
+ blr
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .cfi_endproc
+.LFE0:
+ .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
+ .ident "GCC: (Debian 8.3.0-6) 8.3.0"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx_mem.h"
+#include "vpx_mem_intrnl.h"
+#include "vpx_integer.h"
+
+#if !defined(VPX_MAX_ALLOCABLE_MEMORY)
+#if SIZE_MAX > (1ULL << 40)
+#define VPX_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+#else
+// For 32-bit targets keep this below INT_MAX to avoid valgrind warnings.
+#define VPX_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16))
+#endif
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
+ const uint64_t total_size = nmemb * size;
+ if (nmemb == 0) return 1;
+ if (size > VPX_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+ if (total_size != (size_t)total_size) return 0;
+
+ return 1;
+}
+
+static size_t *get_malloc_address_location(void *const mem) {
+ return ((size_t *)mem) - 1;
+}
+
+static uint64_t get_aligned_malloc_size(size_t size, size_t align) {
+ return (uint64_t)size + align - 1 + ADDRESS_STORAGE_SIZE;
+}
+
+static void set_actual_malloc_address(void *const mem,
+ const void *const malloc_addr) {
+ size_t *const malloc_addr_location = get_malloc_address_location(mem);
+ *malloc_addr_location = (size_t)malloc_addr;
+}
+
+static void *get_actual_malloc_address(void *const mem) {
+ size_t *const malloc_addr_location = get_malloc_address_location(mem);
+ return (void *)(*malloc_addr_location);
+}
+
+void *vpx_memalign(size_t align, size_t size) {
+ void *x = NULL, *addr;
+ const uint64_t aligned_size = get_aligned_malloc_size(size, align);
+ if (!check_size_argument_overflow(1, aligned_size)) return NULL;
+
+ addr = malloc((size_t)aligned_size);
+ if (addr) {
+ x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, align);
+ set_actual_malloc_address(x, addr);
+ }
+ return x;
+}
+
+void *vpx_malloc(size_t size) { return vpx_memalign(DEFAULT_ALIGNMENT, size); }
+
+void *vpx_calloc(size_t num, size_t size) {
+ void *x;
+ if (!check_size_argument_overflow(num, size)) return NULL;
+
+ x = vpx_malloc(num * size);
+ if (x) memset(x, 0, num * size);
+ return x;
+}
+
+void vpx_free(void *memblk) {
+ if (memblk) {
+ void *addr = get_actual_malloc_address(memblk);
+ free(addr);
+ }
+}