From: Konstantinos Margaritis Date: Tue, 20 Sep 2022 20:16:25 +0000 (+0000) Subject: PoC simplified and isolated unit test for libvpx (VP8 & VP9) that uses pypowersim_wrapper X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=57b80c915caf03d90f10aeb9221af4e1033a6199;p=openpower-isa.git PoC simplified and isolated unit test for libvpx (VP8 & VP9) that uses pypowersim_wrapper --- diff --git a/media/video/libvpx/Makefile b/media/video/libvpx/Makefile new file mode 100644 index 00000000..06b82323 --- /dev/null +++ b/media/video/libvpx/Makefile @@ -0,0 +1,33 @@ +TARGET=libvpx_variance_test +EXAMPLE=pypowersim_wrapper_example + +CC=gcc +CXX=g++ +AS=powerpc64le-linux-gnu-as +OBJCOPY=powerpc64le-linux-gnu-objcopy +CFLAGS= -Iinclude -O -g3 -I/usr/include/python3.7m +CXXFLAGS= -Iinclude -O -g3 +ASFLAGS= -mlibresoc +LDFLAGS=-lgtest -pthread -lpython3.7m + +BINFILES = variancefuncs_svp64.bin +ASFILES = variancefuncs_svp64.s +CFILES = variance_ref.c variance_svp64.c variance_svp64_wrappers.c vpx_mem.c +CPPFILES = test_libvpx.cc variance_test.cc +EXAMPLEC = pypowersim_wrapper_example.c +EXAMPLEOBJ= ${EXAMPLEC:.c=.o} +OBJFILES = $(CFILES:.c=.o) $(CPPFILES:.cc=.o) $(ASFILES:.s=.o) + +variancefuncs_svp64.bin: variancefuncs_svp64.o + ${OBJCOPY} -I elf64-little -O binary $< $@ + +${TARGET}: ${OBJFILES} ${BINFILES} + ${CXX} -o ${TARGET} ${OBJFILES} ${LDFLAGS} + +${EXAMPLE}: ${EXAMPLEOBJ} + +all: ${TARGET} ${EXAMPLE} + +.PHONY: clean +clean: + rm -f ${TARGET} ${OBJFILES} ${BINFILES} diff --git a/media/video/libvpx/include/acm_random.h b/media/video/libvpx/include/acm_random.h new file mode 100644 index 00000000..cd14818d --- /dev/null +++ b/media/video/libvpx/include/acm_random.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_TEST_ACM_RANDOM_H_ +#define VPX_TEST_ACM_RANDOM_H_ + +#include + +#include + +#include "gtest/gtest.h" + +#include "vpx_integer.h" + +namespace libvpx_test { + +class ACMRandom { + public: + ACMRandom() : random_(DeterministicSeed()) {} + + explicit ACMRandom(int seed) : random_(seed) {} + + void Reset(int seed) { random_.Reseed(seed); } + uint16_t Rand16(void) { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + return (value >> 15) & 0xffff; + } + + int32_t Rand20Signed(void) { + // Use 20 bits: values between 524287 and -524288. + const uint32_t value = random_.Generate(1048576); + return static_cast(value) - 524288; + } + + int16_t Rand16Signed(void) { + // Use 16 bits: values between 32767 and -32768. + return static_cast(random_.Generate(65536)); + } + + int16_t Rand13Signed(void) { + // Use 13 bits: values between 4095 and -4096. + const uint32_t value = random_.Generate(8192); + return static_cast(value) - 4096; + } + + int16_t Rand9Signed(void) { + // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). + const uint32_t value = random_.Generate(512); + return static_cast(value) - 256; + } + + uint8_t Rand8(void) { + const uint32_t value = + random_.Generate(testing::internal::Random::kMaxRange); + // There's a bit more entropy in the upper bits of this implementation. + return (value >> 23) & 0xff; + } + + uint8_t Rand8Extremes(void) { + // Returns a random value near 0 or near 255, to better exercise + // saturation behavior. + const uint8_t r = Rand8(); + return static_cast((r < 128) ? r << 4 : r >> 4); + } + + uint32_t RandRange(const uint32_t range) { + // testing::internal::Random::Generate provides values in the range + // testing::internal::Random::kMaxRange. + assert(range <= testing::internal::Random::kMaxRange); + return random_.Generate(range); + } + + int PseudoUniform(int range) { return random_.Generate(range); } + + int operator()(int n) { return PseudoUniform(n); } + + static int DeterministicSeed(void) { return 0xbaba; } + + private: + testing::internal::Random random_; +}; + +} // namespace libvpx_test + +#endif // VPX_TEST_ACM_RANDOM_H_ diff --git a/media/video/libvpx/include/clear_system_state.h b/media/video/libvpx/include/clear_system_state.h new file mode 100644 index 00000000..66e0ee96 --- /dev/null +++ b/media/video/libvpx/include/clear_system_state.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VPX_TEST_CLEAR_SYSTEM_STATE_H_ +#define VPX_TEST_CLEAR_SYSTEM_STATE_H_ + +//#include "./vpx_config.h" +#include "system_state.h" + +namespace libvpx_test { + +// Reset system to a known state. This function should be used for all non-API +// test cases. +inline void ClearSystemState() { vpx_clear_system_state(); } + +} // namespace libvpx_test +#endif // VPX_TEST_CLEAR_SYSTEM_STATE_H_ diff --git a/media/video/libvpx/include/mem.h b/media/video/libvpx/include/mem.h new file mode 100644 index 00000000..95bcba75 --- /dev/null +++ b/media/video/libvpx/include/mem.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_PORTS_MEM_H_ +#define VPX_VPX_PORTS_MEM_H_ + +//#include "vpx_config.h" +#include "vpx_integer.h" + +#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) +#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n))) +#elif defined(_MSC_VER) +#define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val +#else +#warning No alignment directives known for this compiler. +#define DECLARE_ALIGNED(n, typ, val) typ val +#endif + +#if HAVE_NEON && defined(_MSC_VER) +#define __builtin_prefetch(x) +#endif + +/* Shift down with rounding */ +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n)) +#define ROUND64_POWER_OF_TWO(value, n) (((value) + (1ULL << ((n)-1))) >> (n)) + +#define ALIGN_POWER_OF_TWO(value, n) \ + (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1)) +#define CAST_TO_SHORTPTR(x) ((uint16_t *)((uintptr_t)(x))) +#if CONFIG_VP9_HIGHBITDEPTH +#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1)) +#define CAST_TO_BYTEPTR(x) ((uint8_t *)((uintptr_t)(x))) +#endif // CONFIG_VP9_HIGHBITDEPTH + +#endif // VPX_VPX_PORTS_MEM_H_ diff --git a/media/video/libvpx/include/register_state_check.h b/media/video/libvpx/include/register_state_check.h new file mode 100644 index 00000000..89ee725e --- /dev/null +++ b/media/video/libvpx/include/register_state_check.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_TEST_REGISTER_STATE_CHECK_H_ +#define VPX_TEST_REGISTER_STATE_CHECK_H_ + +#include "gtest/gtest.h" +//#include "./vpx_config.h" +#include "vpx_integer.h" + +// ASM_REGISTER_STATE_CHECK(asm_function) +// Minimally validates the environment pre & post function execution. This +// variant should be used with assembly functions which are not expected to +// fully restore the system state. See platform implementations of +// RegisterStateCheck for details. +// +// API_REGISTER_STATE_CHECK(api_function) +// Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any +// additional checks to ensure the environment is in a consistent state pre & +// post function execution. This variant should be used with API functions. +// See platform implementations of RegisterStateCheckXXX for details. +// + +#if defined(_WIN64) && VPX_ARCH_X86_64 + +#undef NOMINMAX +#define NOMINMAX +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#include + +inline bool operator==(const M128A &lhs, const M128A &rhs) { + return (lhs.Low == rhs.Low && lhs.High == rhs.High); +} + +namespace libvpx_test { + +// Compares the state of xmm[6-15] at construction with their state at +// destruction. These registers should be preserved by the callee on +// Windows x64. +class RegisterStateCheck { + public: + RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } + ~RegisterStateCheck() { Check(); } + + private: + static bool StoreRegisters(CONTEXT *const context) { + const HANDLE this_thread = GetCurrentThread(); + EXPECT_TRUE(this_thread != NULL); + context->ContextFlags = CONTEXT_FLOATING_POINT; + const bool context_saved = GetThreadContext(this_thread, context) == TRUE; + EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); + return context_saved; + } + + // Compares the register state. Returns true if the states match. + void Check() const { + ASSERT_TRUE(initialized_); + CONTEXT post_context; + ASSERT_TRUE(StoreRegisters(&post_context)); + + const M128A *xmm_pre = &pre_context_.Xmm6; + const M128A *xmm_post = &post_context.Xmm6; + for (int i = 6; i <= 15; ++i) { + EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; + ++xmm_pre; + ++xmm_post; + } + } + + bool initialized_; + CONTEXT pre_context_; +}; + +#define ASM_REGISTER_STATE_CHECK(statement) \ + do { \ + libvpx_test::RegisterStateCheck reg_check; \ + statement; \ + } while (false) + +} // namespace libvpx_test + +#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && \ + defined(CONFIG_VP9) && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9 + +extern "C" { +// Save the d8-d15 registers into store. +void vpx_push_neon(int64_t *store); +} + +namespace libvpx_test { + +// Compares the state of d8-d15 at construction with their state at +// destruction. These registers should be preserved by the callee on +// arm platform. +class RegisterStateCheck { + public: + RegisterStateCheck() { vpx_push_neon(pre_store_); } + ~RegisterStateCheck() { Check(); } + + private: + // Compares the register state. Returns true if the states match. + void Check() const { + int64_t post_store[8]; + vpx_push_neon(post_store); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(pre_store_[i], post_store[i]) + << "d" << i + 8 << " has been modified"; + } + } + + int64_t pre_store_[8]; +}; + +#define ASM_REGISTER_STATE_CHECK(statement) \ + do { \ + libvpx_test::RegisterStateCheck reg_check; \ + statement; \ + } while (false) + +} // namespace libvpx_test + +#else + +namespace libvpx_test { + +class RegisterStateCheck {}; +#define ASM_REGISTER_STATE_CHECK(statement) statement + +} // namespace libvpx_test + +#endif // _WIN64 && VPX_ARCH_X86_64 + +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 +#if defined(__GNUC__) + +namespace libvpx_test { + +// Checks the FPU tag word pre/post execution to ensure emms has been called. +class RegisterStateCheckMMX { + public: + RegisterStateCheckMMX() { + __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_)); + } + ~RegisterStateCheckMMX() { Check(); } + + private: + // Checks the FPU tag word pre/post execution, returning false if not cleared + // to 0xffff. + void Check() const { + EXPECT_EQ(0xffff, pre_fpu_env_[4]) + << "FPU was in an inconsistent state prior to call"; + + uint16_t post_fpu_env[14]; + __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env)); + EXPECT_EQ(0xffff, post_fpu_env[4]) + << "FPU was left in an inconsistent state after call"; + } + + uint16_t pre_fpu_env_[14]; +}; + +#define API_REGISTER_STATE_CHECK(statement) \ + do { \ + libvpx_test::RegisterStateCheckMMX reg_check; \ + ASM_REGISTER_STATE_CHECK(statement); \ + } while (false) + +} // namespace libvpx_test + +#endif // __GNUC__ +#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 + +#ifndef API_REGISTER_STATE_CHECK +#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK +#endif + +#endif // VPX_TEST_REGISTER_STATE_CHECK_H_ diff --git a/media/video/libvpx/include/system_state.h b/media/video/libvpx/include/system_state.h new file mode 100644 index 00000000..4d2d93bc --- /dev/null +++ b/media/video/libvpx/include/system_state.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_PORTS_SYSTEM_STATE_H_ +#define VPX_VPX_PORTS_SYSTEM_STATE_H_ + +//#include "./vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX +extern void vpx_clear_system_state(void); +#else +#define vpx_clear_system_state() +#endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_PORTS_SYSTEM_STATE_H_ diff --git a/media/video/libvpx/include/variance.h b/media/video/libvpx/include/variance.h new file mode 100644 index 00000000..4d51b5cc --- /dev/null +++ b/media/video/libvpx/include/variance.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_DSP_VARIANCE_H_ +#define VPX_VPX_DSP_VARIANCE_H_ + +//#include "./vpx_config.h" + +#include "vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FILTER_BITS 7 +#define FILTER_WEIGHT 128 + +typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride); + +typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + const uint8_t *second_pred); + +typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride, + uint8_t *ref_ptr, int ref_stride, int n); + +typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sad_array); + +typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride, + const uint8_t *const b_array[], + int ref_stride, unsigned int *sad_array); + +typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, unsigned int *sse); + +typedef unsigned int (*vpx_subpixvariance_fn_t)( + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +typedef unsigned int (*vpx_subp_avg_variance_fn_t)( + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, + const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, + const uint8_t *second_pred); + +#if CONFIG_VP8 +typedef struct variance_vtable { + vpx_sad_fn_t sdf; + vpx_variance_fn_t vf; + vpx_subpixvariance_fn_t svf; + vpx_sad_multi_d_fn_t sdx4df; +#if VPX_ARCH_X86 || VPX_ARCH_X86_64 + vp8_copy32xn_fn_t copymem; +#endif +} vp8_variance_fn_ptr_t; +#endif // CONFIG_VP8 + +#if CONFIG_VP9 +typedef struct vp9_variance_vtable { + vpx_sad_fn_t sdf; + vpx_sad_avg_fn_t sdaf; + vpx_variance_fn_t vf; + vpx_subpixvariance_fn_t svf; + vpx_subp_avg_variance_fn_t svaf; + vpx_sad_multi_d_fn_t sdx4df; +} vp9_variance_fn_ptr_t; +#endif // CONFIG_VP9 + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_VPX_DSP_VARIANCE_H_ diff --git a/media/video/libvpx/include/vpx_dsp_rtcd.h b/media/video/libvpx/include/vpx_dsp_rtcd.h new file mode 100644 index 00000000..537fd10b --- /dev/null +++ b/media/video/libvpx/include/vpx_dsp_rtcd.h @@ -0,0 +1,211 @@ +// This file is generated. Do not edit. +#ifndef VPX_DSP_RTCD_H_ +#define VPX_DSP_RTCD_H_ + +#ifdef RTCD_C +#define RTCD_EXTERN +#else +#define RTCD_EXTERN extern +#endif + +/* + * DSP + */ + +#include "vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int vpx_get_mb_ss_c(const int16_t *); + +unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride); + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); + +unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +uint32_t vpx_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + + +// SVP64 prototypes +unsigned int vpx_get_mb_ss_svp64(const int16_t *); + +unsigned int vpx_get4x4sse_cs_svp64(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride); + +void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); + +unsigned int vpx_mse16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_mse8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +uint32_t vpx_sub_pixel_avg_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_avg_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred); + +uint32_t vpx_sub_pixel_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +uint32_t vpx_sub_pixel_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse); + +unsigned int vpx_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +unsigned int vpx_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/media/video/libvpx/include/vpx_integer.h b/media/video/libvpx/include/vpx_integer.h new file mode 100644 index 00000000..e0ab5471 --- /dev/null +++ b/media/video/libvpx/include/vpx_integer.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_VPX_INTEGER_H_ +#define VPX_VPX_VPX_INTEGER_H_ + +/* get ptrdiff_t, size_t, wchar_t, NULL */ +#include + +#if defined(_MSC_VER) +#define VPX_FORCE_INLINE __forceinline +#define VPX_INLINE __inline +#else +#define VPX_FORCE_INLINE __inline__ __attribute__((always_inline)) +// TODO(jbb): Allow a way to force inline off for older compilers. +#define VPX_INLINE inline +#endif + +#define INLINE VPX_INLINE + +/* Assume platforms have the C99 standard integer types. */ + +#if defined(__cplusplus) +#if !defined(__STDC_FORMAT_MACROS) +#define __STDC_FORMAT_MACROS +#endif +#if !defined(__STDC_LIMIT_MACROS) +#define __STDC_LIMIT_MACROS +#endif +#endif // __cplusplus + +#include +#include + +#endif // VPX_VPX_VPX_INTEGER_H_ diff --git a/media/video/libvpx/include/vpx_mem.h b/media/video/libvpx/include/vpx_mem.h new file mode 100644 index 00000000..e260a309 --- /dev/null +++ b/media/video/libvpx/include/vpx_mem.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_MEM_VPX_MEM_H_ +#define VPX_VPX_MEM_VPX_MEM_H_ + +//#include "vpx_config.h" +#if defined(__uClinux__) +#include +#endif + +#include +#include + +#include "vpx_integer.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +void *vpx_memalign(size_t align, size_t size); +void *vpx_malloc(size_t size); +void *vpx_calloc(size_t num, size_t size); +void vpx_free(void *memblk); + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void *vpx_memset16(void *dest, int val, size_t length) { + size_t i; + uint16_t *dest16 = (uint16_t *)dest; + for (i = 0; i < length; i++) *dest16++ = val; + return dest; +} +#endif + +#include + +#ifdef VPX_MEM_PLTFRM +#include VPX_MEM_PLTFRM +#endif + +#if defined(__cplusplus) +} +#endif + +#endif // VPX_VPX_MEM_VPX_MEM_H_ diff --git a/media/video/libvpx/include/vpx_mem_intrnl.h b/media/video/libvpx/include/vpx_mem_intrnl.h new file mode 100644 index 00000000..33da6c21 --- /dev/null +++ b/media/video/libvpx/include/vpx_mem_intrnl.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +#define VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ +//#include "./vpx_config.h" + +#define ADDRESS_STORAGE_SIZE sizeof(size_t) + +#ifndef DEFAULT_ALIGNMENT +#if defined(VXWORKS) +/*default addr alignment to use in calls to vpx_* functions other than + * vpx_memalign*/ +#define DEFAULT_ALIGNMENT 32 +#else +#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) /* NOLINT */ +#endif +#endif + +/*returns an addr aligned to the byte boundary specified by align*/ +#define align_addr(addr, align) \ + (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1)) + +#endif // VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ diff --git a/media/video/libvpx/include/vpx_misc.h b/media/video/libvpx/include/vpx_misc.h new file mode 100644 index 00000000..62c4212b --- /dev/null +++ b/media/video/libvpx/include/vpx_misc.h @@ -0,0 +1,14 @@ +#ifndef VPX_VPX_PORTS_MISC_H_ +#define VPX_VPX_PORTS_MISC_H_ + +/*!\brief Bit depth for codec + * * + * This enumeration determines the bit depth of the codec. + */ +typedef enum vpx_bit_depth { + VPX_BITS_8 = 8, /**< 8 bits */ + VPX_BITS_10 = 10, /**< 10 bits */ + VPX_BITS_12 = 12, /**< 12 bits */ +} vpx_bit_depth_t; + +#endif diff --git a/media/video/libvpx/include/vpx_timer.h b/media/video/libvpx/include/vpx_timer.h new file mode 100644 index 00000000..518de18d --- /dev/null +++ b/media/video/libvpx/include/vpx_timer.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VPX_PORTS_VPX_TIMER_H_ +#define VPX_VPX_PORTS_VPX_TIMER_H_ + +//#include "./vpx_config.h" + +#include "vpx_integer.h" + +#if CONFIG_OS_SUPPORT + +#if defined(_WIN32) +/* + * Win32 specific includes + */ +#undef NOMINMAX +#define NOMINMAX +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#else +/* + * POSIX specific includes + */ +#include + +/* timersub is not provided by msys at this time. */ +#ifndef timersub +#define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif +#endif + +struct vpx_usec_timer { +#if defined(_WIN32) + LARGE_INTEGER begin, end; +#else + struct timeval begin, end; +#endif +}; + +static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) { +#if defined(_WIN32) + QueryPerformanceCounter(&t->begin); +#else + gettimeofday(&t->begin, NULL); +#endif +} + +static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) { +#if defined(_WIN32) + QueryPerformanceCounter(&t->end); +#else + gettimeofday(&t->end, NULL); +#endif +} + +static INLINE int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { +#if defined(_WIN32) + LARGE_INTEGER freq, diff; + + diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; + + QueryPerformanceFrequency(&freq); + return diff.QuadPart * 1000000 / freq.QuadPart; +#else + struct timeval diff; + + timersub(&t->end, &t->begin, &diff); + return (int64_t)diff.tv_sec * 1000000 + diff.tv_usec; +#endif +} + +#else /* CONFIG_OS_SUPPORT = 0*/ + +/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ +#ifndef timersub +#define timersub(a, b, result) +#endif + +struct vpx_usec_timer { + void *dummy; +}; + +static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {} + +static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {} + +static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; } + +#endif /* CONFIG_OS_SUPPORT */ + +#endif // VPX_VPX_PORTS_VPX_TIMER_H_ diff --git a/media/video/libvpx/test_libvpx.cc b/media/video/libvpx/test_libvpx.cc new file mode 100644 index 00000000..8fd34246 --- /dev/null +++ b/media/video/libvpx/test_libvpx.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include + +#include + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/media/video/libvpx/variance_ref.c b/media/video/libvpx/variance_ref.c new file mode 100644 index 00000000..78a6dbfa --- /dev/null +++ b/media/video/libvpx/variance_ref.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n))) + +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n)) + +#define FILTER_BITS 7 + +static const uint8_t bilinear_filters[8][2] = { + { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, + { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, +}; + +uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { + int distortion = 0; + int r, c; + + for (r = 0; r < 4; ++r) { + for (c = 0; c < 4; ++c) { + int diff = src_ptr[c] - ref_ptr[c]; + distortion += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + return distortion; +} + +uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) { + unsigned int i, sum = 0; + + for (i = 0; i < 256; ++i) { + sum += src_ptr[i] * src_ptr[i]; + } + + return sum; +} + +static void variance(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, int h, + uint32_t *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + const int diff = src_ptr[j] - ref_ptr[j]; + *sum += diff; + *sse += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// the first-pass of 2-D separable filter. +// +// Produces int16_t output to retain precision for the next pass. Two filter +// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is +// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). +// It defines the offset required to move from one input to the next. +static void var_filter_block2d_bil_first_pass( + const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, + int pixel_step, unsigned int output_height, unsigned int output_width, + const uint8_t *filter) { + unsigned int i, j; + + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + + ++src_ptr; + } + + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; + } +} + +// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// the second-pass of 2-D separable filter. +// +// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two +// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the +// filter is applied horizontally (pixel_step = 1) or vertically +// (pixel_step = stride). It defines the offset required to move from one input +// to the next. Output is 8-bit. +static void var_filter_block2d_bil_second_pass( + const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, + unsigned int pixel_step, unsigned int output_height, + unsigned int output_width, const uint8_t *filter) { + unsigned int i, j; + + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + ++src_ptr; + } + + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; + } +} + +void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} + +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + } + +#define SUBPIX_VAR(W, H) \ + uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \ + } + +#define SUBPIX_AVG_VAR(W, H) \ + uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ + \ + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ + \ + return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \ + } + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse, int *sum) { \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ + } + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse; \ + } + +/* All three forms of the variance are available in the same sizes. */ +#define VARIANCES(W, H) \ + VAR(W, H) \ + SUBPIX_VAR(W, H) \ + SUBPIX_AVG_VAR(W, H) + +VARIANCES(64, 64) +VARIANCES(64, 32) +VARIANCES(32, 64) +VARIANCES(32, 32) +VARIANCES(32, 16) +VARIANCES(16, 32) +VARIANCES(16, 16) +VARIANCES(16, 8) +VARIANCES(8, 16) +VARIANCES(8, 8) +VARIANCES(8, 4) +VARIANCES(4, 8) +VARIANCES(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) + + diff --git a/media/video/libvpx/variance_svp64.c b/media/video/libvpx/variance_svp64.c new file mode 100644 index 00000000..55e3f199 --- /dev/null +++ b/media/video/libvpx/variance_svp64.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n))) + +#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n)) + +#define FILTER_BITS 7 + +static const uint8_t bilinear_filters[8][2] = { + { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, + { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, +}; + +uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride) { + int distortion = 0; + int r, c; + + for (r = 0; r < 4; ++r) { + for (c = 0; c < 4; ++c) { + int diff = src_ptr[c] - ref_ptr[c]; + distortion += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } + + return distortion; +} + +/* +uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { + unsigned int i, sum = 0; + + for (i = 0; i < 256; ++i) { + sum += src_ptr[i] * src_ptr[i]; + } + + return sum; +}*/ + +static void variance_svp64(const uint8_t *src_ptr, int src_stride, + const uint8_t *ref_ptr, int ref_stride, int w, int h, + uint32_t *sse, int *sum) { + int i, j; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + const int diff = src_ptr[j] - ref_ptr[j]; + *sum += diff; + *sse += diff * diff; + } + + src_ptr += src_stride; + ref_ptr += ref_stride; + } +} + +// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// the first-pass of 2-D separable filter. +// +// Produces int16_t output to retain precision for the next pass. Two filter +// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is +// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). +// It defines the offset required to move from one input to the next. +static void var_filter_block2d_bil_first_pass_svp64( + const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, + int pixel_step, unsigned int output_height, unsigned int output_width, + const uint8_t *filter) { + unsigned int i, j; + + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + + ++src_ptr; + } + + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; + } +} + +// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// the second-pass of 2-D separable filter. +// +// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two +// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the +// filter is applied horizontally (pixel_step = 1) or vertically +// (pixel_step = stride). It defines the offset required to move from one input +// to the next. Output is 8-bit. +static void var_filter_block2d_bil_second_pass_svp64( + const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, + unsigned int pixel_step, unsigned int output_height, + unsigned int output_width, const uint8_t *filter) { + unsigned int i, j; + + for (i = 0; i < output_height; ++i) { + for (j = 0; j < output_width; ++j) { + ref_ptr[j] = ROUND_POWER_OF_TWO( + (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], + FILTER_BITS); + ++src_ptr; + } + + src_ptr += src_pixels_per_line - output_width; + ref_ptr += output_width; + } +} + +void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride) { + int i, j; + + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } +} + +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + } + +#define SUBPIX_VAR(W, H) \ + uint32_t vpx_sub_pixel_variance##W##x##H##_svp64( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + \ + var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + return vpx_variance##W##x##H##_svp64(temp2, W, ref_ptr, ref_stride, sse);\ + } + +#define SUBPIX_AVG_VAR(W, H) \ + uint32_t vpx_sub_pixel_avg_variance##W##x##H##_svp64( \ + const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ + const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint8_t temp2[H * W]; \ + DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ + \ + var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \ + W, bilinear_filters[x_offset]); \ + var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W, \ + bilinear_filters[y_offset]); \ + \ + vpx_comp_avg_pred_svp64(temp3, second_pred, W, H, temp2, W); \ + \ + return vpx_variance##W##x##H##_svp64(temp3, W, ref_ptr, ref_stride, sse);\ + } + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_svp64(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse, int *sum) { \ + variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ + } + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride, \ + const uint8_t *ref_ptr, int ref_stride, \ + uint32_t *sse) { \ + int sum; \ + variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ + return *sse; \ + } + +/* All three forms of the variance are available in the same sizes. */ +#define VARIANCES(W, H) \ + VAR(W, H) \ + SUBPIX_VAR(W, H) \ + SUBPIX_AVG_VAR(W, H) + +VARIANCES(64, 64) +VARIANCES(64, 32) +VARIANCES(32, 64) +VARIANCES(32, 32) +VARIANCES(32, 16) +VARIANCES(16, 32) +VARIANCES(16, 16) +VARIANCES(16, 8) +VARIANCES(8, 16) +VARIANCES(8, 8) +VARIANCES(8, 4) +VARIANCES(4, 8) +VARIANCES(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) + + diff --git a/media/video/libvpx/variance_svp64_wrappers.c b/media/video/libvpx/variance_svp64_wrappers.c new file mode 100644 index 00000000..c6e8431b --- /dev/null +++ b/media/video/libvpx/variance_svp64_wrappers.c @@ -0,0 +1,59 @@ +#include +#include +#include + +#include "pypowersim_wrapper_common.h" +#include "variance_svp64_wrappers.h" + +uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) { + // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM + // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily + const uint64_t src_ptr_svp64 = 0x100000; + const uint64_t *src_ptr64 = (const uint64_t *) src_ptr; + + // Create the pypowersim_state + pypowersim_state_t *state = pypowersim_prepare(); + + // Change the relevant elements, mandatory: body + // + state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get_mb_ss_svp64_real, 1000); + // Set GPR #3 to the pointer + PyObject *address = PyLong_FromLongLong(src_ptr_svp64); + PyList_SetItem(state->initial_regs, 3, address); + // Load data into buffer from real memory + size_t size = 256*sizeof(uint16_t)/sizeof(uint64_t); + for (int i=0; i < size; i++) { + PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8); + PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i)); + PyDict_SetItem(state->initial_mem, address, word); + } + + // Prepare the arguments object for the call + pypowersim_prepareargs(state); + + // Call the function and get the resulting object + state->result_obj = PyObject_CallObject(state->simulator, state->args); + Py_DECREF(state->simulator); + Py_DECREF(state->args); + if (!state->result_obj) { + PyErr_Print(); + printf("Error invoking 'run_a_simulation'\n"); + } + + // Get the GPRs from the result_obj + PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr"); + if (!final_regs) { + PyErr_Print(); + Py_DECREF(state->result_obj); + printf("Error getting final GPRs\n"); + } + + // GPR #3 holds the return value as an integer + PyObject *key = PyLong_FromLong(3); + PyObject *itm = PyDict_GetItem(final_regs, key); + PyObject *value = PyObject_GetAttrString(itm, "value"); + uint64_t val = PyLong_AsLongLong(value); + + // Return value + return (uint32_t) val; +} diff --git a/media/video/libvpx/variance_svp64_wrappers.h b/media/video/libvpx/variance_svp64_wrappers.h new file mode 100644 index 00000000..d38ecff0 --- /dev/null +++ b/media/video/libvpx/variance_svp64_wrappers.h @@ -0,0 +1,3 @@ +#include + +uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr); diff --git a/media/video/libvpx/variance_test.cc b/media/video/libvpx/variance_test.cc new file mode 100644 index 00000000..2ea74a7b --- /dev/null +++ b/media/video/libvpx/variance_test.cc @@ -0,0 +1,786 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include + +#include "vpx_misc.h" +#include "vpx_dsp_rtcd.h" +#include "acm_random.h" +#include "clear_system_state.h" +#include "register_state_check.h" +#include "vpx_integer.h" +#include "variance.h" +#include "vpx_mem.h" +#include "mem.h" +#include "vpx_timer.h" + +namespace { + +typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride); +typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); + +using libvpx_test::ACMRandom; + +// Truncate high bit depth results by downshifting (with rounding) by: +// 2 * (bit_depth - 8) for sse +// (bit_depth - 8) for se +static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { + switch (bit_depth) { + case VPX_BITS_12: + *sse = (*sse + 128) >> 8; + *se = (*se + 8) >> 4; + break; + case VPX_BITS_10: + *sse = (*sse + 8) >> 4; + *se = (*se + 2) >> 2; + break; + case VPX_BITS_8: + default: break; + } +} + +static unsigned int mb_ss_ref(const int16_t *src) { + unsigned int res = 0; + for (int i = 0; i < 256; ++i) { + res += src[i] * src[i]; + } + return res; +} + +/* Note: + * Our codebase calculates the "diff" value in the variance algorithm by + * (src - ref). + */ +static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, + int l2h, int src_stride, int ref_stride, + uint32_t *sse_ptr, bool use_high_bit_depth_, + vpx_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int diff; + if (!use_high_bit_depth_) { + diff = src[y * src_stride + x] - ref[y * ref_stride + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast(sse); + return static_cast( + sse - ((static_cast(se) * se) >> (l2w + l2h))); +} + +/* The subpel reference functions differ from the codec version in one aspect: + * they calculate the bilinear factors directly instead of using a lookup table + * and therefore upshift xoff and yoff by 1. Only every other calculated value + * is used so the codec version shrinks the table to save space and maintain + * compatibility with vp8. + */ +static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, + int l2w, int l2h, int xoff, int yoff, + uint32_t *sse_ptr, bool use_high_bit_depth_, + vpx_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // Bilinear interpolation at a 16th pel step. + if (!use_high_bit_depth_) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = r - src[w * y + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast(sse); + return static_cast( + sse - ((static_cast(se) * se) >> (l2w + l2h))); +} + +static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, + const uint8_t *second_pred, int l2w, + int l2h, int xoff, int yoff, + uint32_t *sse_ptr, + bool use_high_bit_depth, + vpx_bit_depth_t bit_depth) { + int64_t se = 0; + uint64_t sse = 0; + const int w = 1 << l2w; + const int h = 1 << l2h; + + xoff <<= 1; + yoff <<= 1; + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + // bilinear interpolation at a 16th pel step + if (!use_high_bit_depth) { + const int a1 = ref[(w + 1) * (y + 0) + x + 0]; + const int a2 = ref[(w + 1) * (y + 0) + x + 1]; + const int b1 = ref[(w + 1) * (y + 1) + x + 0]; + const int b2 = ref[(w + 1) * (y + 1) + x + 1]; + const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); + const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); + const int r = a + (((b - a) * yoff + 8) >> 4); + const int diff = + ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; + se += diff; + sse += diff * diff; + } + } + } + RoundHighBitDepth(bit_depth, &se, &sse); + *sse_ptr = static_cast(sse); + return static_cast( + sse - ((static_cast(se) * se) >> (l2w + l2h))); +} + +//////////////////////////////////////////////////////////////////////////////// + +class SumOfSquaresTest : public ::testing::TestWithParam { + public: + SumOfSquaresTest() : func_(GetParam()) {} + + virtual ~SumOfSquaresTest() { libvpx_test::ClearSystemState(); } + + protected: + void ConstTest(); + void RefTest(); + + SumOfSquaresFunction func_; + ACMRandom rnd_; +}; + +void SumOfSquaresTest::ConstTest() { + int16_t mem[256]; + unsigned int res; + for (int v = 0; v < 20; ++v) { + for (int i = 0; i < 256; ++i) { + mem[i] = v; + } + ASM_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(256u * (v * v), res); + } +} + +void SumOfSquaresTest::RefTest() { + int16_t mem[256]; + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 256; ++j) { + mem[j] = rnd_.Rand8() - rnd_.Rand8(); + } + + const unsigned int expected = mb_ss_ref(mem); + unsigned int res; + ASM_REGISTER_STATE_CHECK(res = func_(mem)); + EXPECT_EQ(expected, res); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Encapsulating struct to store the function to test along with +// some testing context. +// Can be used for MSE, SSE, Variance, etc. + +template +struct TestParams { + TestParams(int log2w = 0, int log2h = 0, Func function = nullptr, + int bit_depth_value = 0) + : log2width(log2w), log2height(log2h), func(function) { + use_high_bit_depth = (bit_depth_value > 0); + if (use_high_bit_depth) { + bit_depth = static_cast(bit_depth_value); + } else { + bit_depth = VPX_BITS_8; + } + width = 1 << log2width; + height = 1 << log2height; + block_size = width * height; + mask = (1u << bit_depth) - 1; + } + + int log2width, log2height; + int width, height; + int block_size; + Func func; + vpx_bit_depth_t bit_depth; + bool use_high_bit_depth; + uint32_t mask; +}; + +template +std::ostream &operator<<(std::ostream &os, const TestParams &p) { + return os << "log2width/height:" << p.log2width << "/" << p.log2height + << " function:" << reinterpret_cast(p.func) + << " bit-depth:" << p.bit_depth; +} + +// Main class for testing a function type +template +class MainTestClass + : public ::testing::TestWithParam > { + public: + virtual void SetUp() { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + const size_t unit = + use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); + src_ = reinterpret_cast(vpx_memalign(16, block_size() * unit)); + ref_ = new uint8_t[block_size() * unit]; + ASSERT_NE(src_, nullptr); + ASSERT_NE(ref_, nullptr); + } + + virtual void TearDown() { + + vpx_free(src_); + delete[] ref_; + src_ = nullptr; + ref_ = nullptr; + libvpx_test::ClearSystemState(); + } + + protected: + // We could sub-class MainTestClass into dedicated class for Variance + // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing + // to access top class fields xxx. That's cumbersome, so for now we'll just + // implement the testing methods here: + + // Variance tests + void ZeroTest(); + void RefTest(); + void RefStrideTest(); + void OneQuarterTest(); + void SpeedTest(); + + // MSE/SSE tests + void RefTestMse(); + void RefTestSse(); + void MaxTestMse(); + void MaxTestSse(); + + protected: + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + TestParams params_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + uint32_t mask() const { return params_.mask; } +}; + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to variance. + +template +void MainTestClass::ZeroTest() { + for (int i = 0; i <= 255; ++i) { + if (!use_high_bit_depth()) { + memset(src_, i, block_size()); + } else { + uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); + for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); + } + for (int j = 0; j <= 255; ++j) { + if (!use_high_bit_depth()) { + memset(ref_, j, block_size()); + } else { + uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); + for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); + } + unsigned int sse, var; + ASM_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; + } + } +} + +template +void MainTestClass::RefTest() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); j++) { + if (!use_high_bit_depth()) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + } + unsigned int sse1, sse2, var1, var2; + const int stride = width(); + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(src_, stride, ref_, stride, &sse1)); + var2 = + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template +void MainTestClass::RefStrideTest() { + for (int i = 0; i < 10; ++i) { + const int ref_stride = (i & 1) * width(); + const int src_stride = ((i >> 1) & 1) * width(); + for (int j = 0; j < block_size(); j++) { + const int ref_ind = (j / width()) * ref_stride + j % width(); + const int src_ind = (j / width()) * src_stride + j % width(); + if (!use_high_bit_depth()) { + src_[src_ind] = rnd_.Rand8(); + ref_[ref_ind] = rnd_.Rand8(); + } + } + unsigned int sse1, sse2; + unsigned int var1, var2; + + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); + var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, + src_stride, ref_stride, &sse2, use_high_bit_depth(), + params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; + EXPECT_EQ(var1, var2) << "Error at test index: " << i; + } +} + +template +void MainTestClass::OneQuarterTest() { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 255, block_size()); + memset(ref_, 255, half); + memset(ref_ + half, 0, half); + } + unsigned int sse, var, expected; + ASM_REGISTER_STATE_CHECK( + var = params_.func(src_, width(), ref_, width(), &sse)); + expected = block_size() * 255 * 255 / 4; + EXPECT_EQ(expected, var); +} + +template +void MainTestClass::SpeedTest() { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 255, block_size()); + memset(ref_, 255, half); + memset(ref_ + half, 0, half); + } + unsigned int sse; + + vpx_usec_timer timer; + vpx_usec_timer_start(&timer); + for (int i = 0; i < (1 << 30) / block_size(); ++i) { + const uint32_t variance = params_.func(src_, width(), ref_, width(), &sse); + // Ignore return value. + (void)variance; + } + vpx_usec_timer_mark(&timer); + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("Variance %dx%d time: %5d ms\n", width(), height(), + elapsed_time / 1000); +} + +//////////////////////////////////////////////////////////////////////////////// +// Tests related to MSE / SSE. + +template +void MainTestClass::RefTestMse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse1, sse2; + const int stride = width(); + ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, false, VPX_BITS_8); + EXPECT_EQ(sse1, sse2); + } +} + +template +void MainTestClass::RefTestSse() { + for (int i = 0; i < 10; ++i) { + for (int j = 0; j < block_size(); ++j) { + src_[j] = rnd_.Rand8(); + ref_[j] = rnd_.Rand8(); + } + unsigned int sse2; + unsigned int var1; + const int stride = width(); + ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); + variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, + stride, &sse2, false, VPX_BITS_8); + EXPECT_EQ(var1, sse2); + } +} + +template +void MainTestClass::MaxTestMse() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int sse; + ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); + const unsigned int expected = block_size() * 255 * 255; + EXPECT_EQ(expected, sse); +} + +template +void MainTestClass::MaxTestSse() { + memset(src_, 255, block_size()); + memset(ref_, 0, block_size()); + unsigned int var; + ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); + const unsigned int expected = block_size() * 255 * 255; + EXPECT_EQ(expected, var); +} + +//////////////////////////////////////////////////////////////////////////////// + +template +class SubpelVarianceTest + : public ::testing::TestWithParam > { + public: + virtual void SetUp() { + params_ = this->GetParam(); + + rnd_.Reset(ACMRandom::DeterministicSeed()); + if (!use_high_bit_depth()) { + src_ = reinterpret_cast(vpx_memalign(16, block_size())); + sec_ = reinterpret_cast(vpx_memalign(16, block_size())); + ref_ = reinterpret_cast( + vpx_malloc(block_size() + width() + height() + 1)); + } + ASSERT_NE(src_, nullptr); + ASSERT_NE(sec_, nullptr); + ASSERT_NE(ref_, nullptr); + } + + virtual void TearDown() { + if (!use_high_bit_depth()) { + vpx_free(src_); + vpx_free(sec_); + vpx_free(ref_); + } + libvpx_test::ClearSystemState(); + } + + protected: + void RefTest(); + void ExtremeRefTest(); + void SpeedTest(); + + ACMRandom rnd_; + uint8_t *src_; + uint8_t *ref_; + uint8_t *sec_; + TestParams params_; + + // some relay helpers + bool use_high_bit_depth() const { return params_.use_high_bit_depth; } + int byte_shift() const { return params_.bit_depth - 8; } + int block_size() const { return params_.block_size; } + int width() const { return params_.width; } + int height() const { return params_.height; } + uint32_t mask() const { return params_.mask; } +}; + +template +void SubpelVarianceTest::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } + unsigned int sse1, sse2; + unsigned int var1; + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); + const unsigned int var2 = subpel_variance_ref( + ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +template +void SubpelVarianceTest::ExtremeRefTest() { + // Compare against reference. + // Src: Set the first half of values to 0, the second half to the maximum. + // Ref: Set the first half of values to the maximum, the second half to 0. + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + const int half = block_size() / 2; + if (!use_high_bit_depth()) { + memset(src_, 0, half); + memset(src_ + half, 255, half); + memset(ref_, 255, half); + memset(ref_ + half, 0, half + width() + height() + 1); + } + unsigned int sse1, sse2; + unsigned int var1; + ASM_REGISTER_STATE_CHECK( + var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); + const unsigned int var2 = subpel_variance_ref( + ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; + EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; + } + } +} + +template +void SubpelVarianceTest::SpeedTest() { + // The only interesting points are 0, 4, and anything else. To make the loops + // simple we will use 0, 2 and 4. + for (int x = 0; x <= 4; x += 2) { + for (int y = 0; y <= 4; y += 2) { + if (!use_high_bit_depth()) { + memset(src_, 25, block_size()); + memset(ref_, 50, block_size()); + } + unsigned int sse; + vpx_usec_timer timer; + vpx_usec_timer_start(&timer); + for (int i = 0; i < 1000000000 / block_size(); ++i) { + const uint32_t variance = + params_.func(ref_, width() + 1, x, y, src_, width(), &sse); + (void)variance; + } + vpx_usec_timer_mark(&timer); + const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); + printf("SubpelVariance %dx%d xoffset: %d yoffset: %d time: %5d ms\n", + width(), height(), x, y, elapsed_time / 1000); + } + } +} + +template <> +void SubpelVarianceTest::RefTest() { + for (int x = 0; x < 8; ++x) { + for (int y = 0; y < 8; ++y) { + if (!use_high_bit_depth()) { + for (int j = 0; j < block_size(); j++) { + src_[j] = rnd_.Rand8(); + sec_[j] = rnd_.Rand8(); + } + for (int j = 0; j < block_size() + width() + height() + 1; j++) { + ref_[j] = rnd_.Rand8(); + } + } + uint32_t sse1, sse2; + uint32_t var1, var2; + ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y, + src_, width(), &sse1, sec_)); + var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width, + params_.log2height, x, y, &sse2, + use_high_bit_depth(), params_.bit_depth); + EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; + EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; + } + } +} + +typedef MainTestClass VpxSseTest; +typedef MainTestClass VpxMseTest; +typedef MainTestClass VpxVarianceTest; +typedef SubpelVarianceTest VpxSubpelVarianceTest; +typedef SubpelVarianceTest VpxSubpelAvgVarianceTest; + +TEST_P(VpxSseTest, RefSse) { RefTestSse(); } +TEST_P(VpxSseTest, MaxSse) { MaxTestSse(); } +TEST_P(VpxMseTest, RefMse) { RefTestMse(); } +TEST_P(VpxMseTest, MaxMse) { MaxTestMse(); } +TEST_P(VpxVarianceTest, Zero) { ZeroTest(); } +TEST_P(VpxVarianceTest, Ref) { RefTest(); } +TEST_P(VpxVarianceTest, RefStride) { RefStrideTest(); } +TEST_P(VpxVarianceTest, OneQuarter) { OneQuarterTest(); } +TEST_P(SumOfSquaresTest, Const) { ConstTest(); } +TEST_P(SumOfSquaresTest, Ref) { RefTest(); } +TEST_P(VpxSubpelVarianceTest, Ref) { RefTest(); } +TEST_P(VpxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } +TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); } + +INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest, + ::testing::Values(vpx_get_mb_ss_c)); + +typedef TestParams SseParams; +INSTANTIATE_TEST_SUITE_P(C, VpxSseTest, + ::testing::Values(SseParams(2, 2, + &vpx_get4x4sse_cs_c))); + +typedef TestParams MseParams; +INSTANTIATE_TEST_SUITE_P(C, VpxMseTest, + ::testing::Values(MseParams(4, 4, &vpx_mse16x16_c), + MseParams(4, 3, &vpx_mse16x8_c), + MseParams(3, 4, &vpx_mse8x16_c), + MseParams(3, 3, &vpx_mse8x8_c))); + +typedef TestParams VarianceParams; +INSTANTIATE_TEST_SUITE_P( + C, VpxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_c), + VarianceParams(6, 5, &vpx_variance64x32_c), + VarianceParams(5, 6, &vpx_variance32x64_c), + VarianceParams(5, 5, &vpx_variance32x32_c), + VarianceParams(5, 4, &vpx_variance32x16_c), + VarianceParams(4, 5, &vpx_variance16x32_c), + VarianceParams(4, 4, &vpx_variance16x16_c), + VarianceParams(4, 3, &vpx_variance16x8_c), + VarianceParams(3, 4, &vpx_variance8x16_c), + VarianceParams(3, 3, &vpx_variance8x8_c), + VarianceParams(3, 2, &vpx_variance8x4_c), + VarianceParams(2, 3, &vpx_variance4x8_c), + VarianceParams(2, 2, &vpx_variance4x4_c))); + +typedef TestParams SubpelVarianceParams; +INSTANTIATE_TEST_SUITE_P( + C, VpxSubpelVarianceTest, + ::testing::Values( + SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_c, 0), + SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_c, 0), + SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_c, 0), + SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_c, 0), + SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_c, 0), + SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_c, 0), + SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_c, 0), + SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_c, 0), + SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_c, 0), + SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_c, 0), + SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_c, 0), + SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_c, 0), + SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_c, 0))); + +typedef TestParams SubpelAvgVarianceParams; +INSTANTIATE_TEST_SUITE_P( + C, VpxSubpelAvgVarianceTest, + ::testing::Values( + SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0), + SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0), + SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0), + SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0), + SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0), + SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0), + SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0), + SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0), + SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0), + SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0), + SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0), + SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0), + SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0))); + +INSTANTIATE_TEST_SUITE_P(SVP64, SumOfSquaresTest, + ::testing::Values(vpx_get_mb_ss_svp64)); + +typedef TestParams SseParams; +INSTANTIATE_TEST_SUITE_P(SVP64, VpxSseTest, + ::testing::Values(SseParams(2, 2, + &vpx_get4x4sse_cs_svp64))); + +typedef TestParams MseParams; +INSTANTIATE_TEST_SUITE_P(SVP64, VpxMseTest, + ::testing::Values(MseParams(4, 4, &vpx_mse16x16_svp64), + MseParams(4, 3, &vpx_mse16x8_svp64), + MseParams(3, 4, &vpx_mse8x16_svp64), + MseParams(3, 3, &vpx_mse8x8_svp64))); + +typedef TestParams VarianceParams; +INSTANTIATE_TEST_SUITE_P( + SVP64, VpxVarianceTest, + ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_svp64), + VarianceParams(6, 5, &vpx_variance64x32_svp64), + VarianceParams(5, 6, &vpx_variance32x64_svp64), + VarianceParams(5, 5, &vpx_variance32x32_svp64), + VarianceParams(5, 4, &vpx_variance32x16_svp64), + VarianceParams(4, 5, &vpx_variance16x32_svp64), + VarianceParams(4, 4, &vpx_variance16x16_svp64), + VarianceParams(4, 3, &vpx_variance16x8_svp64), + VarianceParams(3, 4, &vpx_variance8x16_svp64), + VarianceParams(3, 3, &vpx_variance8x8_svp64), + VarianceParams(3, 2, &vpx_variance8x4_svp64), + VarianceParams(2, 3, &vpx_variance4x8_svp64), + VarianceParams(2, 2, &vpx_variance4x4_svp64))); + +typedef TestParams SubpelVarianceParams; +INSTANTIATE_TEST_SUITE_P( + SVP64, VpxSubpelVarianceTest, + ::testing::Values( + SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_svp64, 0), + SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_svp64, 0), + SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_svp64, 0), + SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_svp64, 0), + SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_svp64, 0), + SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_svp64, 0), + SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_svp64, 0), + SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_svp64, 0), + SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_svp64, 0), + SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_svp64, 0), + SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_svp64, 0), + SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_svp64, 0), + SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_svp64, 0))); + +typedef TestParams SubpelAvgVarianceParams; +INSTANTIATE_TEST_SUITE_P( + SVP64, VpxSubpelAvgVarianceTest, + ::testing::Values( + SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_svp64, 0), + SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_svp64, 0), + SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_svp64, 0), + SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_svp64, 0), + SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_svp64, 0), + SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_svp64, 0), + SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_svp64, 0), + SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_svp64, 0), + SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_svp64, 0), + SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_svp64, 0), + SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_svp64, 0), + SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_svp64, 0), + SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_svp64, 0))); + +} // namespace diff --git a/media/video/libvpx/variancefuncs_svp64.c.in b/media/video/libvpx/variancefuncs_svp64.c.in new file mode 100644 index 00000000..94ecf4db --- /dev/null +++ b/media/video/libvpx/variancefuncs_svp64.c.in @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr) { + unsigned int i, sum = 0; + + for (i = 0; i < 256; ++i) { + sum += src_ptr[i] * src_ptr[i]; + } + + return sum; +} + diff --git a/media/video/libvpx/variancefuncs_svp64.s b/media/video/libvpx/variancefuncs_svp64.s new file mode 100644 index 00000000..6d33d6b9 --- /dev/null +++ b/media/video/libvpx/variancefuncs_svp64.s @@ -0,0 +1,27 @@ + .file "variancefuncs_svp64.c" + .abiversion 2 + .section ".text" + .align 2 + .globl vpx_get_mb_ss_svp64_real + .type vpx_get_mb_ss_svp64_real, @function +vpx_get_mb_ss_svp64_real: +.LFB0: + .cfi_startproc + addi 10,3,-2 + li 3,0 + li 9,256 + mtctr 9 +.L2: + lhau 9,2(10) + mullw 9,9,9 + add 9,9,3 + rldicl 3,9,0,32 + bdnz .L2 + blr + .long 0 + .byte 0,0,0,0,0,0,0,0 + .cfi_endproc +.LFE0: + .size vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real + .ident "GCC: (Debian 8.3.0-6) 8.3.0" + .section .note.GNU-stack,"",@progbits diff --git a/media/video/libvpx/vpx_mem.c b/media/video/libvpx/vpx_mem.c new file mode 100644 index 00000000..27f978ce --- /dev/null +++ b/media/video/libvpx/vpx_mem.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include + +#include "vpx_mem.h" +#include "vpx_mem_intrnl.h" +#include "vpx_integer.h" + +#if !defined(VPX_MAX_ALLOCABLE_MEMORY) +#if SIZE_MAX > (1ULL << 40) +#define VPX_MAX_ALLOCABLE_MEMORY (1ULL << 40) +#else +// For 32-bit targets keep this below INT_MAX to avoid valgrind warnings. +#define VPX_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16)) +#endif +#endif + +// Returns 0 in case of overflow of nmemb * size. +static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) { + const uint64_t total_size = nmemb * size; + if (nmemb == 0) return 1; + if (size > VPX_MAX_ALLOCABLE_MEMORY / nmemb) return 0; + if (total_size != (size_t)total_size) return 0; + + return 1; +} + +static size_t *get_malloc_address_location(void *const mem) { + return ((size_t *)mem) - 1; +} + +static uint64_t get_aligned_malloc_size(size_t size, size_t align) { + return (uint64_t)size + align - 1 + ADDRESS_STORAGE_SIZE; +} + +static void set_actual_malloc_address(void *const mem, + const void *const malloc_addr) { + size_t *const malloc_addr_location = get_malloc_address_location(mem); + *malloc_addr_location = (size_t)malloc_addr; +} + +static void *get_actual_malloc_address(void *const mem) { + size_t *const malloc_addr_location = get_malloc_address_location(mem); + return (void *)(*malloc_addr_location); +} + +void *vpx_memalign(size_t align, size_t size) { + void *x = NULL, *addr; + const uint64_t aligned_size = get_aligned_malloc_size(size, align); + if (!check_size_argument_overflow(1, aligned_size)) return NULL; + + addr = malloc((size_t)aligned_size); + if (addr) { + x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, align); + set_actual_malloc_address(x, addr); + } + return x; +} + +void *vpx_malloc(size_t size) { return vpx_memalign(DEFAULT_ALIGNMENT, size); } + +void *vpx_calloc(size_t num, size_t size) { + void *x; + if (!check_size_argument_overflow(num, size)) return NULL; + + x = vpx_malloc(num * size); + if (x) memset(x, 0, num * size); + return x; +} + +void vpx_free(void *memblk) { + if (memblk) { + void *addr = get_actual_malloc_address(memblk); + free(addr); + } +}