From 57b80c915caf03d90f10aeb9221af4e1033a6199 Mon Sep 17 00:00:00 2001
From: Konstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Date: Tue, 20 Sep 2022 20:16:25 +0000
Subject: [PATCH] PoC simplified and isolated unit test for libvpx (VP8 & VP9)
 that uses pypowersim_wrapper

---
 media/video/libvpx/Makefile                   |  33 +
 media/video/libvpx/include/acm_random.h       |  93 +++
 .../video/libvpx/include/clear_system_state.h |  23 +
 media/video/libvpx/include/mem.h              |  44 +
 .../libvpx/include/register_state_check.h     | 187 +++++
 media/video/libvpx/include/system_state.h     |  30 +
 media/video/libvpx/include/variance.h         |  84 ++
 media/video/libvpx/include/vpx_dsp_rtcd.h     | 211 +++++
 media/video/libvpx/include/vpx_integer.h      |  42 +
 media/video/libvpx/include/vpx_mem.h          |  52 ++
 media/video/libvpx/include/vpx_mem_intrnl.h   |  31 +
 media/video/libvpx/include/vpx_misc.h         |  14 +
 media/video/libvpx/include/vpx_timer.h        | 109 +++
 media/video/libvpx/test_libvpx.cc             |  18 +
 media/video/libvpx/variance_ref.c             | 238 ++++++
 media/video/libvpx/variance_svp64.c           | 239 ++++++
 media/video/libvpx/variance_svp64_wrappers.c  |  59 ++
 media/video/libvpx/variance_svp64_wrappers.h  |   3 +
 media/video/libvpx/variance_test.cc           | 786 ++++++++++++++++++
 media/video/libvpx/variancefuncs_svp64.c.in   |  22 +
 media/video/libvpx/variancefuncs_svp64.s      |  27 +
 media/video/libvpx/vpx_mem.c                  |  87 ++
 22 files changed, 2432 insertions(+)
 create mode 100644 media/video/libvpx/Makefile
 create mode 100644 media/video/libvpx/include/acm_random.h
 create mode 100644 media/video/libvpx/include/clear_system_state.h
 create mode 100644 media/video/libvpx/include/mem.h
 create mode 100644 media/video/libvpx/include/register_state_check.h
 create mode 100644 media/video/libvpx/include/system_state.h
 create mode 100644 media/video/libvpx/include/variance.h
 create mode 100644 media/video/libvpx/include/vpx_dsp_rtcd.h
 create mode 100644 media/video/libvpx/include/vpx_integer.h
 create mode 100644 media/video/libvpx/include/vpx_mem.h
 create mode 100644 media/video/libvpx/include/vpx_mem_intrnl.h
 create mode 100644 media/video/libvpx/include/vpx_misc.h
 create mode 100644 media/video/libvpx/include/vpx_timer.h
 create mode 100644 media/video/libvpx/test_libvpx.cc
 create mode 100644 media/video/libvpx/variance_ref.c
 create mode 100644 media/video/libvpx/variance_svp64.c
 create mode 100644 media/video/libvpx/variance_svp64_wrappers.c
 create mode 100644 media/video/libvpx/variance_svp64_wrappers.h
 create mode 100644 media/video/libvpx/variance_test.cc
 create mode 100644 media/video/libvpx/variancefuncs_svp64.c.in
 create mode 100644 media/video/libvpx/variancefuncs_svp64.s
 create mode 100644 media/video/libvpx/vpx_mem.c

diff --git a/media/video/libvpx/Makefile b/media/video/libvpx/Makefile
new file mode 100644
index 00000000..06b82323
--- /dev/null
+++ b/media/video/libvpx/Makefile
@@ -0,0 +1,33 @@
+TARGET=libvpx_variance_test
+EXAMPLE=pypowersim_wrapper_example
+
+CC=gcc
+CXX=g++
+AS=powerpc64le-linux-gnu-as
+OBJCOPY=powerpc64le-linux-gnu-objcopy
+CFLAGS= -Iinclude -O -g3 -I/usr/include/python3.7m
+CXXFLAGS= -Iinclude -O -g3
+ASFLAGS= -mlibresoc
+LDFLAGS=-lgtest -pthread -lpython3.7m
+
+BINFILES = variancefuncs_svp64.bin
+ASFILES  = variancefuncs_svp64.s
+CFILES   = variance_ref.c  variance_svp64.c  variance_svp64_wrappers.c  vpx_mem.c
+CPPFILES = test_libvpx.cc  variance_test.cc
+EXAMPLEC = pypowersim_wrapper_example.c
+EXAMPLEOBJ= ${EXAMPLEC:.c=.o}
+OBJFILES = $(CFILES:.c=.o) $(CPPFILES:.cc=.o) $(ASFILES:.s=.o)
+
+variancefuncs_svp64.bin: variancefuncs_svp64.o
+	${OBJCOPY} -I elf64-little -O binary $< $@ 
+
+${TARGET}: ${OBJFILES} ${BINFILES}
+	${CXX} -o ${TARGET} ${OBJFILES} ${LDFLAGS}
+
+${EXAMPLE}: ${EXAMPLEOBJ}
+
+all: ${TARGET} ${EXAMPLE}
+
+.PHONY: clean
+clean:
+	rm -f ${TARGET} ${OBJFILES} ${BINFILES}
diff --git a/media/video/libvpx/include/acm_random.h b/media/video/libvpx/include/acm_random.h
new file mode 100644
index 00000000..cd14818d
--- /dev/null
+++ b/media/video/libvpx/include/acm_random.h
@@ -0,0 +1,93 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_TEST_ACM_RANDOM_H_
+#define VPX_TEST_ACM_RANDOM_H_
+
+#include <assert.h>
+
+#include <limits>
+
+#include "gtest/gtest.h"
+
+#include "vpx_integer.h"
+
+namespace libvpx_test {
+
+class ACMRandom {
+ public:
+  ACMRandom() : random_(DeterministicSeed()) {}
+
+  explicit ACMRandom(int seed) : random_(seed) {}
+
+  void Reset(int seed) { random_.Reseed(seed); }
+  uint16_t Rand16(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    return (value >> 15) & 0xffff;
+  }
+
+  int32_t Rand20Signed(void) {
+    // Use 20 bits: values between 524287 and -524288.
+    const uint32_t value = random_.Generate(1048576);
+    return static_cast<int32_t>(value) - 524288;
+  }
+
+  int16_t Rand16Signed(void) {
+    // Use 16 bits: values between 32767 and -32768.
+    return static_cast<int16_t>(random_.Generate(65536));
+  }
+
+  int16_t Rand13Signed(void) {
+    // Use 13 bits: values between 4095 and -4096.
+    const uint32_t value = random_.Generate(8192);
+    return static_cast<int16_t>(value) - 4096;
+  }
+
+  int16_t Rand9Signed(void) {
+    // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
+    const uint32_t value = random_.Generate(512);
+    return static_cast<int16_t>(value) - 256;
+  }
+
+  uint8_t Rand8(void) {
+    const uint32_t value =
+        random_.Generate(testing::internal::Random::kMaxRange);
+    // There's a bit more entropy in the upper bits of this implementation.
+    return (value >> 23) & 0xff;
+  }
+
+  uint8_t Rand8Extremes(void) {
+    // Returns a random value near 0 or near 255, to better exercise
+    // saturation behavior.
+    const uint8_t r = Rand8();
+    return static_cast<uint8_t>((r < 128) ? r << 4 : r >> 4);
+  }
+
+  uint32_t RandRange(const uint32_t range) {
+    // testing::internal::Random::Generate provides values in the range
+    // testing::internal::Random::kMaxRange.
+    assert(range <= testing::internal::Random::kMaxRange);
+    return random_.Generate(range);
+  }
+
+  int PseudoUniform(int range) { return random_.Generate(range); }
+
+  int operator()(int n) { return PseudoUniform(n); }
+
+  static int DeterministicSeed(void) { return 0xbaba; }
+
+ private:
+  testing::internal::Random random_;
+};
+
+}  // namespace libvpx_test
+
+#endif  // VPX_TEST_ACM_RANDOM_H_
diff --git a/media/video/libvpx/include/clear_system_state.h b/media/video/libvpx/include/clear_system_state.h
new file mode 100644
index 00000000..66e0ee96
--- /dev/null
+++ b/media/video/libvpx/include/clear_system_state.h
@@ -0,0 +1,23 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VPX_TEST_CLEAR_SYSTEM_STATE_H_
+#define VPX_TEST_CLEAR_SYSTEM_STATE_H_
+
+//#include "./vpx_config.h"
+#include "system_state.h"
+
+namespace libvpx_test {
+
+// Reset system to a known state. This function should be used for all non-API
+// test cases.
+inline void ClearSystemState() { vpx_clear_system_state(); }
+
+}  // namespace libvpx_test
+#endif  // VPX_TEST_CLEAR_SYSTEM_STATE_H_
diff --git a/media/video/libvpx/include/mem.h b/media/video/libvpx/include/mem.h
new file mode 100644
index 00000000..95bcba75
--- /dev/null
+++ b/media/video/libvpx/include/mem.h
@@ -0,0 +1,44 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_MEM_H_
+#define VPX_VPX_PORTS_MEM_H_
+
+//#include "vpx_config.h"
+#include "vpx_integer.h"
+
+#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+#elif defined(_MSC_VER)
+#define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val
+#else
+#warning No alignment directives known for this compiler.
+#define DECLARE_ALIGNED(n, typ, val) typ val
+#endif
+
+#if HAVE_NEON && defined(_MSC_VER)
+#define __builtin_prefetch(x)
+#endif
+
+/* Shift down with rounding */
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+#define ROUND64_POWER_OF_TWO(value, n) (((value) + (1ULL << ((n)-1))) >> (n))
+
+#define ALIGN_POWER_OF_TWO(value, n) \
+  (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
+#define CAST_TO_SHORTPTR(x) ((uint16_t *)((uintptr_t)(x)))
+#if CONFIG_VP9_HIGHBITDEPTH
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
+#define CAST_TO_BYTEPTR(x) ((uint8_t *)((uintptr_t)(x)))
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#endif  // VPX_VPX_PORTS_MEM_H_
diff --git a/media/video/libvpx/include/register_state_check.h b/media/video/libvpx/include/register_state_check.h
new file mode 100644
index 00000000..89ee725e
--- /dev/null
+++ b/media/video/libvpx/include/register_state_check.h
@@ -0,0 +1,187 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_TEST_REGISTER_STATE_CHECK_H_
+#define VPX_TEST_REGISTER_STATE_CHECK_H_
+
+#include "gtest/gtest.h"
+//#include "./vpx_config.h"
+#include "vpx_integer.h"
+
+// ASM_REGISTER_STATE_CHECK(asm_function)
+//   Minimally validates the environment pre & post function execution. This
+//   variant should be used with assembly functions which are not expected to
+//   fully restore the system state. See platform implementations of
+//   RegisterStateCheck for details.
+//
+// API_REGISTER_STATE_CHECK(api_function)
+//   Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
+//   additional checks to ensure the environment is in a consistent state pre &
+//   post function execution. This variant should be used with API functions.
+//   See platform implementations of RegisterStateCheckXXX for details.
+//
+
+#if defined(_WIN64) && VPX_ARCH_X86_64
+
+#undef NOMINMAX
+#define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#include <winnt.h>
+
+inline bool operator==(const M128A &lhs, const M128A &rhs) {
+  return (lhs.Low == rhs.Low && lhs.High == rhs.High);
+}
+
+namespace libvpx_test {
+
+// Compares the state of xmm[6-15] at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// Windows x64.
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
+  ~RegisterStateCheck() { Check(); }
+
+ private:
+  static bool StoreRegisters(CONTEXT *const context) {
+    const HANDLE this_thread = GetCurrentThread();
+    EXPECT_TRUE(this_thread != NULL);
+    context->ContextFlags = CONTEXT_FLOATING_POINT;
+    const bool context_saved = GetThreadContext(this_thread, context) == TRUE;
+    EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError();
+    return context_saved;
+  }
+
+  // Compares the register state. Returns true if the states match.
+  void Check() const {
+    ASSERT_TRUE(initialized_);
+    CONTEXT post_context;
+    ASSERT_TRUE(StoreRegisters(&post_context));
+
+    const M128A *xmm_pre = &pre_context_.Xmm6;
+    const M128A *xmm_post = &post_context.Xmm6;
+    for (int i = 6; i <= 15; ++i) {
+      EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!";
+      ++xmm_pre;
+      ++xmm_post;
+    }
+  }
+
+  bool initialized_;
+  CONTEXT pre_context_;
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement)    \
+  do {                                         \
+    libvpx_test::RegisterStateCheck reg_check; \
+    statement;                                 \
+  } while (false)
+
+}  // namespace libvpx_test
+
+#elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && \
+    defined(CONFIG_VP9) && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
+
+extern "C" {
+// Save the d8-d15 registers into store.
+void vpx_push_neon(int64_t *store);
+}
+
+namespace libvpx_test {
+
+// Compares the state of d8-d15 at construction with their state at
+// destruction. These registers should be preserved by the callee on
+// arm platform.
+class RegisterStateCheck {
+ public:
+  RegisterStateCheck() { vpx_push_neon(pre_store_); }
+  ~RegisterStateCheck() { Check(); }
+
+ private:
+  // Compares the register state. Returns true if the states match.
+  void Check() const {
+    int64_t post_store[8];
+    vpx_push_neon(post_store);
+    for (int i = 0; i < 8; ++i) {
+      EXPECT_EQ(pre_store_[i], post_store[i])
+          << "d" << i + 8 << " has been modified";
+    }
+  }
+
+  int64_t pre_store_[8];
+};
+
+#define ASM_REGISTER_STATE_CHECK(statement)    \
+  do {                                         \
+    libvpx_test::RegisterStateCheck reg_check; \
+    statement;                                 \
+  } while (false)
+
+}  // namespace libvpx_test
+
+#else
+
+namespace libvpx_test {
+
+class RegisterStateCheck {};
+#define ASM_REGISTER_STATE_CHECK(statement) statement
+
+}  // namespace libvpx_test
+
+#endif  // _WIN64 && VPX_ARCH_X86_64
+
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
+#if defined(__GNUC__)
+
+namespace libvpx_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+  RegisterStateCheckMMX() {
+    __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+  }
+  ~RegisterStateCheckMMX() { Check(); }
+
+ private:
+  // Checks the FPU tag word pre/post execution, returning false if not cleared
+  // to 0xffff.
+  void Check() const {
+    EXPECT_EQ(0xffff, pre_fpu_env_[4])
+        << "FPU was in an inconsistent state prior to call";
+
+    uint16_t post_fpu_env[14];
+    __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+    EXPECT_EQ(0xffff, post_fpu_env[4])
+        << "FPU was left in an inconsistent state after call";
+  }
+
+  uint16_t pre_fpu_env_[14];
+};
+
+#define API_REGISTER_STATE_CHECK(statement)       \
+  do {                                            \
+    libvpx_test::RegisterStateCheckMMX reg_check; \
+    ASM_REGISTER_STATE_CHECK(statement);          \
+  } while (false)
+
+}  // namespace libvpx_test
+
+#endif  // __GNUC__
+#endif  // VPX_ARCH_X86 || VPX_ARCH_X86_64
+
+#ifndef API_REGISTER_STATE_CHECK
+#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
+#endif
+
+#endif  // VPX_TEST_REGISTER_STATE_CHECK_H_
diff --git a/media/video/libvpx/include/system_state.h b/media/video/libvpx/include/system_state.h
new file mode 100644
index 00000000..4d2d93bc
--- /dev/null
+++ b/media/video/libvpx/include/system_state.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_SYSTEM_STATE_H_
+#define VPX_VPX_PORTS_SYSTEM_STATE_H_
+
+//#include "./vpx_config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
+extern void vpx_clear_system_state(void);
+#else
+#define vpx_clear_system_state()
+#endif  // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VPX_PORTS_SYSTEM_STATE_H_
diff --git a/media/video/libvpx/include/variance.h b/media/video/libvpx/include/variance.h
new file mode 100644
index 00000000..4d51b5cc
--- /dev/null
+++ b/media/video/libvpx/include/variance.h
@@ -0,0 +1,84 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_DSP_VARIANCE_H_
+#define VPX_VPX_DSP_VARIANCE_H_
+
+//#include "./vpx_config.h"
+
+#include "vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FILTER_BITS 7
+#define FILTER_WEIGHT 128
+
+typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride,
+                                     const uint8_t *ref_ptr, int ref_stride);
+
+typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride,
+                                         const uint8_t *ref_ptr, int ref_stride,
+                                         const uint8_t *second_pred);
+
+typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride,
+                                  uint8_t *ref_ptr, int ref_stride, int n);
+
+typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride,
+                                   const uint8_t *ref_ptr, int ref_stride,
+                                   unsigned int *sad_array);
+
+typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride,
+                                     const uint8_t *const b_array[],
+                                     int ref_stride, unsigned int *sad_array);
+
+typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr,
+                                          int src_stride,
+                                          const uint8_t *ref_ptr,
+                                          int ref_stride, unsigned int *sse);
+
+typedef unsigned int (*vpx_subpixvariance_fn_t)(
+    const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+typedef unsigned int (*vpx_subp_avg_variance_fn_t)(
+    const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,
+    const uint8_t *ref_ptr, int ref_stride, unsigned int *sse,
+    const uint8_t *second_pred);
+
+#if CONFIG_VP8
+typedef struct variance_vtable {
+  vpx_sad_fn_t sdf;
+  vpx_variance_fn_t vf;
+  vpx_subpixvariance_fn_t svf;
+  vpx_sad_multi_d_fn_t sdx4df;
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
+  vp8_copy32xn_fn_t copymem;
+#endif
+} vp8_variance_fn_ptr_t;
+#endif  // CONFIG_VP8
+
+#if CONFIG_VP9
+typedef struct vp9_variance_vtable {
+  vpx_sad_fn_t sdf;
+  vpx_sad_avg_fn_t sdaf;
+  vpx_variance_fn_t vf;
+  vpx_subpixvariance_fn_t svf;
+  vpx_subp_avg_variance_fn_t svaf;
+  vpx_sad_multi_d_fn_t sdx4df;
+} vp9_variance_fn_ptr_t;
+#endif  // CONFIG_VP9
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VPX_VPX_DSP_VARIANCE_H_
diff --git a/media/video/libvpx/include/vpx_dsp_rtcd.h b/media/video/libvpx/include/vpx_dsp_rtcd.h
new file mode 100644
index 00000000..537fd10b
--- /dev/null
+++ b/media/video/libvpx/include/vpx_dsp_rtcd.h
@@ -0,0 +1,211 @@
+// This file is generated. Do not edit.
+#ifndef VPX_DSP_RTCD_H_
+#define VPX_DSP_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * DSP
+ */
+
+#include "vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride);
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+uint32_t vpx_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x32_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x64_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x16_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x4_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x8_c(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+
+// SVP64 prototypes
+unsigned int vpx_get_mb_ss_svp64(const int16_t *);
+
+unsigned int vpx_get4x4sse_cs_svp64(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride);
+
+void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+
+unsigned int vpx_mse16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+uint32_t vpx_sub_pixel_avg_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_avg_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred);
+
+uint32_t vpx_sub_pixel_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+uint32_t vpx_sub_pixel_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse);
+
+unsigned int vpx_variance16x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_svp64(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/media/video/libvpx/include/vpx_integer.h b/media/video/libvpx/include/vpx_integer.h
new file mode 100644
index 00000000..e0ab5471
--- /dev/null
+++ b/media/video/libvpx/include/vpx_integer.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_VPX_INTEGER_H_
+#define VPX_VPX_VPX_INTEGER_H_
+
+/* get ptrdiff_t, size_t, wchar_t, NULL */
+#include <stddef.h>
+
+#if defined(_MSC_VER)
+#define VPX_FORCE_INLINE __forceinline
+#define VPX_INLINE __inline
+#else
+#define VPX_FORCE_INLINE __inline__ __attribute__((always_inline))
+// TODO(jbb): Allow a way to force inline off for older compilers.
+#define VPX_INLINE inline
+#endif
+
+#define INLINE VPX_INLINE
+
+/* Assume platforms have the C99 standard integer types. */
+
+#if defined(__cplusplus)
+#if !defined(__STDC_FORMAT_MACROS)
+#define __STDC_FORMAT_MACROS
+#endif
+#if !defined(__STDC_LIMIT_MACROS)
+#define __STDC_LIMIT_MACROS
+#endif
+#endif  // __cplusplus
+
+#include <inttypes.h>
+#include <stdint.h>
+
+#endif  // VPX_VPX_VPX_INTEGER_H_
diff --git a/media/video/libvpx/include/vpx_mem.h b/media/video/libvpx/include/vpx_mem.h
new file mode 100644
index 00000000..e260a309
--- /dev/null
+++ b/media/video/libvpx/include/vpx_mem.h
@@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_MEM_VPX_MEM_H_
+#define VPX_VPX_MEM_VPX_MEM_H_
+
+//#include "vpx_config.h"
+#if defined(__uClinux__)
+#include <lddk.h>
+#endif
+
+#include <stdlib.h>
+#include <stddef.h>
+
+#include "vpx_integer.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+void *vpx_memalign(size_t align, size_t size);
+void *vpx_malloc(size_t size);
+void *vpx_calloc(size_t num, size_t size);
+void vpx_free(void *memblk);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void *vpx_memset16(void *dest, int val, size_t length) {
+  size_t i;
+  uint16_t *dest16 = (uint16_t *)dest;
+  for (i = 0; i < length; i++) *dest16++ = val;
+  return dest;
+}
+#endif
+
+#include <string.h>
+
+#ifdef VPX_MEM_PLTFRM
+#include VPX_MEM_PLTFRM
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif  // VPX_VPX_MEM_VPX_MEM_H_
diff --git a/media/video/libvpx/include/vpx_mem_intrnl.h b/media/video/libvpx/include/vpx_mem_intrnl.h
new file mode 100644
index 00000000..33da6c21
--- /dev/null
+++ b/media/video/libvpx/include/vpx_mem_intrnl.h
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
+#define VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
+//#include "./vpx_config.h"
+
+#define ADDRESS_STORAGE_SIZE sizeof(size_t)
+
+#ifndef DEFAULT_ALIGNMENT
+#if defined(VXWORKS)
+/*default addr alignment to use in calls to vpx_* functions other than
+ * vpx_memalign*/
+#define DEFAULT_ALIGNMENT 32
+#else
+#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) /* NOLINT */
+#endif
+#endif
+
+/*returns an addr aligned to the byte boundary specified by align*/
+#define align_addr(addr, align) \
+  (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
+
+#endif  // VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
diff --git a/media/video/libvpx/include/vpx_misc.h b/media/video/libvpx/include/vpx_misc.h
new file mode 100644
index 00000000..62c4212b
--- /dev/null
+++ b/media/video/libvpx/include/vpx_misc.h
@@ -0,0 +1,14 @@
+#ifndef VPX_VPX_PORTS_MISC_H_
+#define VPX_VPX_PORTS_MISC_H_
+
+/*!\brief Bit depth for codec
+ * *
+ * This enumeration determines the bit depth of the codec.
+ */
+typedef enum vpx_bit_depth {
+  VPX_BITS_8 = 8,   /**<  8 bits */
+  VPX_BITS_10 = 10, /**< 10 bits */
+  VPX_BITS_12 = 12, /**< 12 bits */
+} vpx_bit_depth_t;
+
+#endif
diff --git a/media/video/libvpx/include/vpx_timer.h b/media/video/libvpx/include/vpx_timer.h
new file mode 100644
index 00000000..518de18d
--- /dev/null
+++ b/media/video/libvpx/include/vpx_timer.h
@@ -0,0 +1,109 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_PORTS_VPX_TIMER_H_
+#define VPX_VPX_PORTS_VPX_TIMER_H_
+
+//#include "./vpx_config.h"
+
+#include "vpx_integer.h"
+
+#if CONFIG_OS_SUPPORT
+
+#if defined(_WIN32)
+/*
+ * Win32 specific includes
+ */
+#undef NOMINMAX
+#define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#else
+/*
+ * POSIX specific includes
+ */
+#include <sys/time.h>
+
+/* timersub is not provided by msys at this time. */
+#ifndef timersub
+#define timersub(a, b, result)                       \
+  do {                                               \
+    (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;    \
+    (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+    if ((result)->tv_usec < 0) {                     \
+      --(result)->tv_sec;                            \
+      (result)->tv_usec += 1000000;                  \
+    }                                                \
+  } while (0)
+#endif
+#endif
+
+struct vpx_usec_timer {
+#if defined(_WIN32)
+  LARGE_INTEGER begin, end;
+#else
+  struct timeval begin, end;
+#endif
+};
+
+static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+  QueryPerformanceCounter(&t->begin);
+#else
+  gettimeofday(&t->begin, NULL);
+#endif
+}
+
+static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+  QueryPerformanceCounter(&t->end);
+#else
+  gettimeofday(&t->end, NULL);
+#endif
+}
+
+static INLINE int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
+#if defined(_WIN32)
+  LARGE_INTEGER freq, diff;
+
+  diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
+
+  QueryPerformanceFrequency(&freq);
+  return diff.QuadPart * 1000000 / freq.QuadPart;
+#else
+  struct timeval diff;
+
+  timersub(&t->end, &t->begin, &diff);
+  return (int64_t)diff.tv_sec * 1000000 + diff.tv_usec;
+#endif
+}
+
+#else /* CONFIG_OS_SUPPORT = 0*/
+
+/* Empty timer functions if CONFIG_OS_SUPPORT = 0 */
+#ifndef timersub
+#define timersub(a, b, result)
+#endif
+
+struct vpx_usec_timer {
+  void *dummy;
+};
+
+static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {}
+
+static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {}
+
+static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; }
+
+#endif /* CONFIG_OS_SUPPORT */
+
+#endif  // VPX_VPX_PORTS_VPX_TIMER_H_
diff --git a/media/video/libvpx/test_libvpx.cc b/media/video/libvpx/test_libvpx.cc
new file mode 100644
index 00000000..8fd34246
--- /dev/null
+++ b/media/video/libvpx/test_libvpx.cc
@@ -0,0 +1,18 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <string>
+
+#include <gtest/gtest.h>
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/media/video/libvpx/variance_ref.c b/media/video/libvpx/variance_ref.c
new file mode 100644
index 00000000..78a6dbfa
--- /dev/null
+++ b/media/video/libvpx/variance_ref.c
@@ -0,0 +1,238 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+
+#define FILTER_BITS 7
+
+static const uint8_t bilinear_filters[8][2] = {
+  { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
+  { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
+};
+
+uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
+                            const uint8_t *ref_ptr, int ref_stride) {
+  int distortion = 0;
+  int r, c;
+
+  for (r = 0; r < 4; ++r) {
+    for (c = 0; c < 4; ++c) {
+      int diff = src_ptr[c] - ref_ptr[c];
+      distortion += diff * diff;
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+  }
+
+  return distortion;
+}
+
+uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
+  unsigned int i, sum = 0;
+
+  for (i = 0; i < 256; ++i) {
+    sum += src_ptr[i] * src_ptr[i];
+  }
+
+  return sum;
+}
+
+static void variance(const uint8_t *src_ptr, int src_stride,
+                     const uint8_t *ref_ptr, int ref_stride, int w, int h,
+                     uint32_t *sse, int *sum) {
+  int i, j;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; ++j) {
+      const int diff = src_ptr[j] - ref_ptr[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+  }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the first-pass of 2-D separable filter.
+//
+// Produces int16_t output to retain precision for the next pass. Two filter
+// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
+// It defines the offset required to move from one input to the next.
+static void var_filter_block2d_bil_first_pass(
+    const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+    int pixel_step, unsigned int output_height, unsigned int output_width,
+    const uint8_t *filter) {
+  unsigned int i, j;
+
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      ref_ptr[j] = ROUND_POWER_OF_TWO(
+          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+          FILTER_BITS);
+
+      ++src_ptr;
+    }
+
+    src_ptr += src_pixels_per_line - output_width;
+    ref_ptr += output_width;
+  }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the second-pass of 2-D separable filter.
+//
+// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step = 1) or vertically
+// (pixel_step = stride). It defines the offset required to move from one input
+// to the next. Output is 8-bit.
+static void var_filter_block2d_bil_second_pass(
+    const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+    unsigned int pixel_step, unsigned int output_height,
+    unsigned int output_width, const uint8_t *filter) {
+  unsigned int i, j;
+
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      ref_ptr[j] = ROUND_POWER_OF_TWO(
+          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+          FILTER_BITS);
+      ++src_ptr;
+    }
+
+    src_ptr += src_pixels_per_line - output_width;
+    ref_ptr += output_width;
+  }
+}
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+                         int height, const uint8_t *ref, int ref_stride) {
+  int i, j;
+
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
+
+#define VAR(W, H)                                                            \
+  uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
+                                     const uint8_t *ref_ptr, int ref_stride, \
+                                     uint32_t *sse) {                        \
+    int sum;                                                                 \
+    variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum);     \
+    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                \
+  }
+
+#define SUBPIX_VAR(W, H)                                                     \
+  uint32_t vpx_sub_pixel_variance##W##x##H##_c(                              \
+      const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
+      const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {               \
+    uint16_t fdata3[(H + 1) * W];                                            \
+    uint8_t temp2[H * W];                                                    \
+                                                                             \
+    var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+                                      W, bilinear_filters[x_offset]);        \
+    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,            \
+                                       bilinear_filters[y_offset]);          \
+                                                                             \
+    return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse);    \
+  }
+
+#define SUBPIX_AVG_VAR(W, H)                                                 \
+  uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(                          \
+      const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
+      const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                 \
+      const uint8_t *second_pred) {                                          \
+    uint16_t fdata3[(H + 1) * W];                                            \
+    uint8_t temp2[H * W];                                                    \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                              \
+                                                                             \
+    var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
+                                      W, bilinear_filters[x_offset]);        \
+    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,            \
+                                       bilinear_filters[y_offset]);          \
+                                                                             \
+    vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W);                 \
+                                                                             \
+    return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse);    \
+  }
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H)                                                   \
+  void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride,  \
+                               const uint8_t *ref_ptr, int ref_stride,  \
+                               uint32_t *sse, int *sum) {               \
+    variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
+  }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H)                                                        \
+  uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride,  \
+                                const uint8_t *ref_ptr, int ref_stride,  \
+                                uint32_t *sse) {                         \
+    int sum;                                                             \
+    variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+    return *sse;                                                         \
+  }
+
+/* All three forms of the variance are available in the same sizes. */
+#define VARIANCES(W, H) \
+  VAR(W, H)             \
+  SUBPIX_VAR(W, H)      \
+  SUBPIX_AVG_VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+
diff --git a/media/video/libvpx/variance_svp64.c b/media/video/libvpx/variance_svp64.c
new file mode 100644
index 00000000..55e3f199
--- /dev/null
+++ b/media/video/libvpx/variance_svp64.c
@@ -0,0 +1,239 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
+
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n))
+
+#define FILTER_BITS 7
+
+static const uint8_t bilinear_filters[8][2] = {
+  { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
+  { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
+};
+
+uint32_t vpx_get4x4sse_cs_svp64(const uint8_t *src_ptr, int src_stride,
+                            const uint8_t *ref_ptr, int ref_stride) {
+  int distortion = 0;
+  int r, c;
+
+  for (r = 0; r < 4; ++r) {
+    for (c = 0; c < 4; ++c) {
+      int diff = src_ptr[c] - ref_ptr[c];
+      distortion += diff * diff;
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+  }
+
+  return distortion;
+}
+
+/*
+uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) {
+  unsigned int i, sum = 0;
+
+  for (i = 0; i < 256; ++i) {
+    sum += src_ptr[i] * src_ptr[i];
+  }
+
+  return sum;
+}*/
+
+static void variance_svp64(const uint8_t *src_ptr, int src_stride,
+                     const uint8_t *ref_ptr, int ref_stride, int w, int h,
+                     uint32_t *sse, int *sum) {
+  int i, j;
+
+  *sum = 0;
+  *sse = 0;
+
+  for (i = 0; i < h; ++i) {
+    for (j = 0; j < w; ++j) {
+      const int diff = src_ptr[j] - ref_ptr[j];
+      *sum += diff;
+      *sse += diff * diff;
+    }
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+  }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the first-pass of 2-D separable filter.
+//
+// Produces int16_t output to retain precision for the next pass. Two filter
+// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
+// It defines the offset required to move from one input to the next.
+static void var_filter_block2d_bil_first_pass_svp64(
+    const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
+    int pixel_step, unsigned int output_height, unsigned int output_width,
+    const uint8_t *filter) {
+  unsigned int i, j;
+
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      ref_ptr[j] = ROUND_POWER_OF_TWO(
+          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+          FILTER_BITS);
+
+      ++src_ptr;
+    }
+
+    src_ptr += src_pixels_per_line - output_width;
+    ref_ptr += output_width;
+  }
+}
+
+// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// the second-pass of 2-D separable filter.
+//
+// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step = 1) or vertically
+// (pixel_step = stride). It defines the offset required to move from one input
+// to the next. Output is 8-bit.
+static void var_filter_block2d_bil_second_pass_svp64(
+    const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
+    unsigned int pixel_step, unsigned int output_height,
+    unsigned int output_width, const uint8_t *filter) {
+  unsigned int i, j;
+
+  for (i = 0; i < output_height; ++i) {
+    for (j = 0; j < output_width; ++j) {
+      ref_ptr[j] = ROUND_POWER_OF_TWO(
+          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
+          FILTER_BITS);
+      ++src_ptr;
+    }
+
+    src_ptr += src_pixels_per_line - output_width;
+    ref_ptr += output_width;
+  }
+}
+
+void vpx_comp_avg_pred_svp64(uint8_t *comp_pred, const uint8_t *pred, int width,
+                         int height, const uint8_t *ref, int ref_stride) {
+  int i, j;
+
+  for (i = 0; i < height; ++i) {
+    for (j = 0; j < width; ++j) {
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+    }
+    comp_pred += width;
+    pred += width;
+    ref += ref_stride;
+  }
+}
+
+#define VAR(W, H)                                                            \
+  uint32_t vpx_variance##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride, \
+                                     const uint8_t *ref_ptr, int ref_stride, \
+                                     uint32_t *sse) {                        \
+    int sum;                                                                 \
+    variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                \
+  }
+
+#define SUBPIX_VAR(W, H)                                                     \
+  uint32_t vpx_sub_pixel_variance##W##x##H##_svp64(                          \
+      const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
+      const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {               \
+    uint16_t fdata3[(H + 1) * W];                                            \
+    uint8_t temp2[H * W];                                                    \
+                                                                             \
+    var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \
+                                      W, bilinear_filters[x_offset]);        \
+    var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W,      \
+                                       bilinear_filters[y_offset]);          \
+                                                                             \
+    return vpx_variance##W##x##H##_svp64(temp2, W, ref_ptr, ref_stride, sse);\
+  }
+
+#define SUBPIX_AVG_VAR(W, H)                                                 \
+  uint32_t vpx_sub_pixel_avg_variance##W##x##H##_svp64(                      \
+      const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
+      const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                 \
+      const uint8_t *second_pred) {                                          \
+    uint16_t fdata3[(H + 1) * W];                                            \
+    uint8_t temp2[H * W];                                                    \
+    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                              \
+                                                                             \
+    var_filter_block2d_bil_first_pass_svp64(src_ptr, fdata3, src_stride, 1, H + 1, \
+                                      W, bilinear_filters[x_offset]);        \
+    var_filter_block2d_bil_second_pass_svp64(fdata3, temp2, W, W, H, W,      \
+                                       bilinear_filters[y_offset]);          \
+                                                                             \
+    vpx_comp_avg_pred_svp64(temp3, second_pred, W, H, temp2, W);             \
+                                                                             \
+    return vpx_variance##W##x##H##_svp64(temp3, W, ref_ptr, ref_stride, sse);\
+  }
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H)                                                   \
+  void vpx_get##W##x##H##var_svp64(const uint8_t *src_ptr, int src_stride,  \
+                               const uint8_t *ref_ptr, int ref_stride,  \
+                               uint32_t *sse, int *sum) {               \
+    variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
+  }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H)                                                        \
+  uint32_t vpx_mse##W##x##H##_svp64(const uint8_t *src_ptr, int src_stride,  \
+                                const uint8_t *ref_ptr, int ref_stride,  \
+                                uint32_t *sse) {                         \
+    int sum;                                                             \
+    variance_svp64(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
+    return *sse;                                                         \
+  }
+
+/* All three forms of the variance are available in the same sizes. */
+#define VARIANCES(W, H) \
+  VAR(W, H)             \
+  SUBPIX_VAR(W, H)      \
+  SUBPIX_AVG_VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+
diff --git a/media/video/libvpx/variance_svp64_wrappers.c b/media/video/libvpx/variance_svp64_wrappers.c
new file mode 100644
index 00000000..c6e8431b
--- /dev/null
+++ b/media/video/libvpx/variance_svp64_wrappers.c
@@ -0,0 +1,59 @@
+#include <Python.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "pypowersim_wrapper_common.h"
+#include "variance_svp64_wrappers.h"
+
+uint32_t vpx_get_mb_ss_svp64(const int16_t *src_ptr) {
+    // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM
+    // we have to memcpy from src_ptr to this pointer, the address was chosen arbitrarily
+    const uint64_t src_ptr_svp64 = 0x100000;
+    const uint64_t *src_ptr64 = (const uint64_t *) src_ptr;
+
+    // Create the pypowersim_state
+    pypowersim_state_t *state = pypowersim_prepare();
+
+    // Change the relevant elements, mandatory: body
+    //
+    state->binary = PyBytes_FromStringAndSize((const char *)&vpx_get_mb_ss_svp64_real, 1000);
+    // Set GPR #3 to the pointer
+    PyObject *address = PyLong_FromLongLong(src_ptr_svp64);
+    PyList_SetItem(state->initial_regs, 3, address);
+    // Load data into buffer from real memory
+    size_t size = 256*sizeof(uint16_t)/sizeof(uint64_t);
+    for (int i=0; i < size; i++) {
+      PyObject *address = PyLong_FromLongLong(src_ptr_svp64 + i*8);
+      PyObject *word = PyLong_FromLongLong(*(src_ptr64 + i));
+      PyDict_SetItem(state->initial_mem, address, word);
+    }
+
+    // Prepare the arguments object for the call
+    pypowersim_prepareargs(state);
+
+    // Call the function and get the resulting object
+    state->result_obj = PyObject_CallObject(state->simulator, state->args);
+    Py_DECREF(state->simulator);
+    Py_DECREF(state->args);
+    if (!state->result_obj) {
+        PyErr_Print();
+        printf("Error invoking 'run_a_simulation'\n");
+    }
+
+    // Get the GPRs from the result_obj
+    PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr");
+    if (!final_regs) {
+        PyErr_Print();
+        Py_DECREF(state->result_obj);
+        printf("Error getting final GPRs\n");
+    }
+
+    // GPR #3 holds the return value as an integer
+    PyObject *key = PyLong_FromLong(3);
+    PyObject *itm = PyDict_GetItem(final_regs, key);
+    PyObject *value = PyObject_GetAttrString(itm, "value");
+    uint64_t val = PyLong_AsLongLong(value);
+
+    // Return value
+    return (uint32_t) val;
+}
diff --git a/media/video/libvpx/variance_svp64_wrappers.h b/media/video/libvpx/variance_svp64_wrappers.h
new file mode 100644
index 00000000..d38ecff0
--- /dev/null
+++ b/media/video/libvpx/variance_svp64_wrappers.h
@@ -0,0 +1,3 @@
+#include <stdint.h>
+
+uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr);
diff --git a/media/video/libvpx/variance_test.cc b/media/video/libvpx/variance_test.cc
new file mode 100644
index 00000000..2ea74a7b
--- /dev/null
+++ b/media/video/libvpx/variance_test.cc
@@ -0,0 +1,786 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdlib>
+#include <new>
+
+#include <gtest/gtest.h>
+
+#include "vpx_misc.h"
+#include "vpx_dsp_rtcd.h"
+#include "acm_random.h"
+#include "clear_system_state.h"
+#include "register_state_check.h"
+#include "vpx_integer.h"
+#include "variance.h"
+#include "vpx_mem.h"
+#include "mem.h"
+#include "vpx_timer.h"
+
+namespace {
+
+typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
+                                      const uint8_t *b, int b_stride);
+typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
+
+using libvpx_test::ACMRandom;
+
+// Truncate high bit depth results by downshifting (with rounding) by:
+// 2 * (bit_depth - 8) for sse
+// (bit_depth - 8) for se
+static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
+  switch (bit_depth) {
+    case VPX_BITS_12:
+      *sse = (*sse + 128) >> 8;
+      *se = (*se + 8) >> 4;
+      break;
+    case VPX_BITS_10:
+      *sse = (*sse + 8) >> 4;
+      *se = (*se + 2) >> 2;
+      break;
+    case VPX_BITS_8:
+    default: break;
+  }
+}
+
+static unsigned int mb_ss_ref(const int16_t *src) {
+  unsigned int res = 0;
+  for (int i = 0; i < 256; ++i) {
+    res += src[i] * src[i];
+  }
+  return res;
+}
+
+/* Note:
+ *  Our codebase calculates the "diff" value in the variance algorithm by
+ *  (src - ref).
+ */
+static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
+                             int l2h, int src_stride, int ref_stride,
+                             uint32_t *sse_ptr, bool use_high_bit_depth_,
+                             vpx_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      int diff;
+      if (!use_high_bit_depth_) {
+        diff = src[y * src_stride + x] - ref[y * ref_stride + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(
+      sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
+static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                    int l2w, int l2h, int xoff, int yoff,
+                                    uint32_t *sse_ptr, bool use_high_bit_depth_,
+                                    vpx_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // Bilinear interpolation at a 16th pel step.
+      if (!use_high_bit_depth_) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff = r - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(
+      sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
+                                        const uint8_t *second_pred, int l2w,
+                                        int l2h, int xoff, int yoff,
+                                        uint32_t *sse_ptr,
+                                        bool use_high_bit_depth,
+                                        vpx_bit_depth_t bit_depth) {
+  int64_t se = 0;
+  uint64_t sse = 0;
+  const int w = 1 << l2w;
+  const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      if (!use_high_bit_depth) {
+        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+        const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+        const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+        const int r = a + (((b - a) * yoff + 8) >> 4);
+        const int diff =
+            ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+        se += diff;
+        sse += diff * diff;
+      }
+    }
+  }
+  RoundHighBitDepth(bit_depth, &se, &sse);
+  *sse_ptr = static_cast<uint32_t>(sse);
+  return static_cast<uint32_t>(
+      sse - ((static_cast<int64_t>(se) * se) >> (l2w + l2h)));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
+ public:
+  SumOfSquaresTest() : func_(GetParam()) {}
+
+  virtual ~SumOfSquaresTest() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void ConstTest();
+  void RefTest();
+
+  SumOfSquaresFunction func_;
+  ACMRandom rnd_;
+};
+
+void SumOfSquaresTest::ConstTest() {
+  int16_t mem[256];
+  unsigned int res;
+  for (int v = 0; v < 20; ++v) {
+    for (int i = 0; i < 256; ++i) {
+      mem[i] = v;
+    }
+    ASM_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(256u * (v * v), res);
+  }
+}
+
+void SumOfSquaresTest::RefTest() {
+  int16_t mem[256];
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 256; ++j) {
+      mem[j] = rnd_.Rand8() - rnd_.Rand8();
+    }
+
+    const unsigned int expected = mb_ss_ref(mem);
+    unsigned int res;
+    ASM_REGISTER_STATE_CHECK(res = func_(mem));
+    EXPECT_EQ(expected, res);
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Encapsulating struct to store the function to test along with
+// some testing context.
+// Can be used for MSE, SSE, Variance, etc.
+
+template <typename Func>
+struct TestParams {
+  TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
+             int bit_depth_value = 0)
+      : log2width(log2w), log2height(log2h), func(function) {
+    use_high_bit_depth = (bit_depth_value > 0);
+    if (use_high_bit_depth) {
+      bit_depth = static_cast<vpx_bit_depth_t>(bit_depth_value);
+    } else {
+      bit_depth = VPX_BITS_8;
+    }
+    width = 1 << log2width;
+    height = 1 << log2height;
+    block_size = width * height;
+    mask = (1u << bit_depth) - 1;
+  }
+
+  int log2width, log2height;
+  int width, height;
+  int block_size;
+  Func func;
+  vpx_bit_depth_t bit_depth;
+  bool use_high_bit_depth;
+  uint32_t mask;
+};
+
+template <typename Func>
+std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
+  return os << "log2width/height:" << p.log2width << "/" << p.log2height
+            << " function:" << reinterpret_cast<const void *>(p.func)
+            << " bit-depth:" << p.bit_depth;
+}
+
+// Main class for testing a function type
+template <typename FunctionType>
+class MainTestClass
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  virtual void SetUp() {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    const size_t unit =
+        use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
+    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size() * unit));
+    ref_ = new uint8_t[block_size() * unit];
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(ref_, nullptr);
+  }
+
+  virtual void TearDown() {
+
+    vpx_free(src_);
+    delete[] ref_;
+    src_ = nullptr;
+    ref_ = nullptr;
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // We could sub-class MainTestClass into dedicated class for Variance
+  // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
+  // to access top class fields xxx. That's cumbersome, so for now we'll just
+  // implement the testing methods here:
+
+  // Variance tests
+  void ZeroTest();
+  void RefTest();
+  void RefStrideTest();
+  void OneQuarterTest();
+  void SpeedTest();
+
+  // MSE/SSE tests
+  void RefTestMse();
+  void RefTestSse();
+  void MaxTestMse();
+  void MaxTestSse();
+
+ protected:
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  uint32_t mask() const { return params_.mask; }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to variance.
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::ZeroTest() {
+  for (int i = 0; i <= 255; ++i) {
+    if (!use_high_bit_depth()) {
+      memset(src_, i, block_size());
+    } else {
+      uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
+      for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
+    }
+    for (int j = 0; j <= 255; ++j) {
+      if (!use_high_bit_depth()) {
+        memset(ref_, j, block_size());
+      } else {
+        uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
+        for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
+      }
+      unsigned int sse, var;
+      ASM_REGISTER_STATE_CHECK(
+          var = params_.func(src_, width(), ref_, width(), &sse));
+      EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
+    }
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefTest() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); j++) {
+      if (!use_high_bit_depth()) {
+        src_[j] = rnd_.Rand8();
+        ref_[j] = rnd_.Rand8();
+      }
+    }
+    unsigned int sse1, sse2, var1, var2;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, stride, ref_, stride, &sse1));
+    var2 =
+        variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                     stride, &sse2, use_high_bit_depth(), params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::RefStrideTest() {
+  for (int i = 0; i < 10; ++i) {
+    const int ref_stride = (i & 1) * width();
+    const int src_stride = ((i >> 1) & 1) * width();
+    for (int j = 0; j < block_size(); j++) {
+      const int ref_ind = (j / width()) * ref_stride + j % width();
+      const int src_ind = (j / width()) * src_stride + j % width();
+      if (!use_high_bit_depth()) {
+        src_[src_ind] = rnd_.Rand8();
+        ref_[ref_ind] = rnd_.Rand8();
+      }
+    }
+    unsigned int sse1, sse2;
+    unsigned int var1, var2;
+
+    ASM_REGISTER_STATE_CHECK(
+        var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
+    var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
+                        src_stride, ref_stride, &sse2, use_high_bit_depth(),
+                        params_.bit_depth);
+    EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
+    EXPECT_EQ(var1, var2) << "Error at test index: " << i;
+  }
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
+  const int half = block_size() / 2;
+  if (!use_high_bit_depth()) {
+    memset(src_, 255, block_size());
+    memset(ref_, 255, half);
+    memset(ref_ + half, 0, half);
+  }
+  unsigned int sse, var, expected;
+  ASM_REGISTER_STATE_CHECK(
+      var = params_.func(src_, width(), ref_, width(), &sse));
+  expected = block_size() * 255 * 255 / 4;
+  EXPECT_EQ(expected, var);
+}
+
+template <typename VarianceFunctionType>
+void MainTestClass<VarianceFunctionType>::SpeedTest() {
+  const int half = block_size() / 2;
+  if (!use_high_bit_depth()) {
+    memset(src_, 255, block_size());
+    memset(ref_, 255, half);
+    memset(ref_ + half, 0, half);
+  }
+  unsigned int sse;
+
+  vpx_usec_timer timer;
+  vpx_usec_timer_start(&timer);
+  for (int i = 0; i < (1 << 30) / block_size(); ++i) {
+    const uint32_t variance = params_.func(src_, width(), ref_, width(), &sse);
+    // Ignore return value.
+    (void)variance;
+  }
+  vpx_usec_timer_mark(&timer);
+  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+  printf("Variance %dx%d time: %5d ms\n", width(), height(),
+         elapsed_time / 1000);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests related to MSE / SSE.
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestMse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse1, sse2;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, false, VPX_BITS_8);
+    EXPECT_EQ(sse1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::RefTestSse() {
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < block_size(); ++j) {
+      src_[j] = rnd_.Rand8();
+      ref_[j] = rnd_.Rand8();
+    }
+    unsigned int sse2;
+    unsigned int var1;
+    const int stride = width();
+    ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
+    variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
+                 stride, &sse2, false, VPX_BITS_8);
+    EXPECT_EQ(var1, sse2);
+  }
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestMse() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int sse;
+  ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
+  const unsigned int expected = block_size() * 255 * 255;
+  EXPECT_EQ(expected, sse);
+}
+
+template <typename FunctionType>
+void MainTestClass<FunctionType>::MaxTestSse() {
+  memset(src_, 255, block_size());
+  memset(ref_, 0, block_size());
+  unsigned int var;
+  ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
+  const unsigned int expected = block_size() * 255 * 255;
+  EXPECT_EQ(expected, var);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <typename FunctionType>
+class SubpelVarianceTest
+    : public ::testing::TestWithParam<TestParams<FunctionType> > {
+ public:
+  virtual void SetUp() {
+    params_ = this->GetParam();
+
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+    if (!use_high_bit_depth()) {
+      src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size()));
+      sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size()));
+      ref_ = reinterpret_cast<uint8_t *>(
+          vpx_malloc(block_size() + width() + height() + 1));
+    }
+    ASSERT_NE(src_, nullptr);
+    ASSERT_NE(sec_, nullptr);
+    ASSERT_NE(ref_, nullptr);
+  }
+
+  virtual void TearDown() {
+    if (!use_high_bit_depth()) {
+      vpx_free(src_);
+      vpx_free(sec_);
+      vpx_free(ref_);
+    }
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  void RefTest();
+  void ExtremeRefTest();
+  void SpeedTest();
+
+  ACMRandom rnd_;
+  uint8_t *src_;
+  uint8_t *ref_;
+  uint8_t *sec_;
+  TestParams<FunctionType> params_;
+
+  // some relay helpers
+  bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
+  int byte_shift() const { return params_.bit_depth - 8; }
+  int block_size() const { return params_.block_size; }
+  int width() const { return params_.width; }
+  int height() const { return params_.height; }
+  uint32_t mask() const { return params_.mask; }
+};
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      ASM_REGISTER_STATE_CHECK(
+          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+      const unsigned int var2 = subpel_variance_ref(
+          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+          use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
+  // Compare against reference.
+  // Src: Set the first half of values to 0, the second half to the maximum.
+  // Ref: Set the first half of values to the maximum, the second half to 0.
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      const int half = block_size() / 2;
+      if (!use_high_bit_depth()) {
+        memset(src_, 0, half);
+        memset(src_ + half, 255, half);
+        memset(ref_, 255, half);
+        memset(ref_ + half, 0, half + width() + height() + 1);
+      }
+      unsigned int sse1, sse2;
+      unsigned int var1;
+      ASM_REGISTER_STATE_CHECK(
+          var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
+      const unsigned int var2 = subpel_variance_ref(
+          ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
+          use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
+    }
+  }
+}
+
+template <typename SubpelVarianceFunctionType>
+void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
+  // The only interesting points are 0, 4, and anything else. To make the loops
+  // simple we will use 0, 2 and 4.
+  for (int x = 0; x <= 4; x += 2) {
+    for (int y = 0; y <= 4; y += 2) {
+      if (!use_high_bit_depth()) {
+        memset(src_, 25, block_size());
+        memset(ref_, 50, block_size());
+      }
+      unsigned int sse;
+      vpx_usec_timer timer;
+      vpx_usec_timer_start(&timer);
+      for (int i = 0; i < 1000000000 / block_size(); ++i) {
+        const uint32_t variance =
+            params_.func(ref_, width() + 1, x, y, src_, width(), &sse);
+        (void)variance;
+      }
+      vpx_usec_timer_mark(&timer);
+      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
+      printf("SubpelVariance %dx%d xoffset: %d yoffset: %d time: %5d ms\n",
+             width(), height(), x, y, elapsed_time / 1000);
+    }
+  }
+}
+
+template <>
+void SubpelVarianceTest<vpx_subp_avg_variance_fn_t>::RefTest() {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
+      if (!use_high_bit_depth()) {
+        for (int j = 0; j < block_size(); j++) {
+          src_[j] = rnd_.Rand8();
+          sec_[j] = rnd_.Rand8();
+        }
+        for (int j = 0; j < block_size() + width() + height() + 1; j++) {
+          ref_[j] = rnd_.Rand8();
+        }
+      }
+      uint32_t sse1, sse2;
+      uint32_t var1, var2;
+      ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
+                                                   src_, width(), &sse1, sec_));
+      var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
+                                     params_.log2height, x, y, &sse2,
+                                     use_high_bit_depth(), params_.bit_depth);
+      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
+      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+    }
+  }
+}
+
+typedef MainTestClass<Get4x4SseFunc> VpxSseTest;
+typedef MainTestClass<vpx_variance_fn_t> VpxMseTest;
+typedef MainTestClass<vpx_variance_fn_t> VpxVarianceTest;
+typedef SubpelVarianceTest<vpx_subpixvariance_fn_t> VpxSubpelVarianceTest;
+typedef SubpelVarianceTest<vpx_subp_avg_variance_fn_t> VpxSubpelAvgVarianceTest;
+
+TEST_P(VpxSseTest, RefSse) { RefTestSse(); }
+TEST_P(VpxSseTest, MaxSse) { MaxTestSse(); }
+TEST_P(VpxMseTest, RefMse) { RefTestMse(); }
+TEST_P(VpxMseTest, MaxMse) { MaxTestMse(); }
+TEST_P(VpxVarianceTest, Zero) { ZeroTest(); }
+TEST_P(VpxVarianceTest, Ref) { RefTest(); }
+TEST_P(VpxVarianceTest, RefStride) { RefStrideTest(); }
+TEST_P(VpxVarianceTest, OneQuarter) { OneQuarterTest(); }
+TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
+TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
+TEST_P(VpxSubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VpxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); }
+
+INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
+                         ::testing::Values(vpx_get_mb_ss_c));
+
+typedef TestParams<Get4x4SseFunc> SseParams;
+INSTANTIATE_TEST_SUITE_P(C, VpxSseTest,
+                         ::testing::Values(SseParams(2, 2,
+                                                     &vpx_get4x4sse_cs_c)));
+
+typedef TestParams<vpx_variance_fn_t> MseParams;
+INSTANTIATE_TEST_SUITE_P(C, VpxMseTest,
+                         ::testing::Values(MseParams(4, 4, &vpx_mse16x16_c),
+                                           MseParams(4, 3, &vpx_mse16x8_c),
+                                           MseParams(3, 4, &vpx_mse8x16_c),
+                                           MseParams(3, 3, &vpx_mse8x8_c)));
+
+typedef TestParams<vpx_variance_fn_t> VarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    C, VpxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_c),
+                      VarianceParams(6, 5, &vpx_variance64x32_c),
+                      VarianceParams(5, 6, &vpx_variance32x64_c),
+                      VarianceParams(5, 5, &vpx_variance32x32_c),
+                      VarianceParams(5, 4, &vpx_variance32x16_c),
+                      VarianceParams(4, 5, &vpx_variance16x32_c),
+                      VarianceParams(4, 4, &vpx_variance16x16_c),
+                      VarianceParams(4, 3, &vpx_variance16x8_c),
+                      VarianceParams(3, 4, &vpx_variance8x16_c),
+                      VarianceParams(3, 3, &vpx_variance8x8_c),
+                      VarianceParams(3, 2, &vpx_variance8x4_c),
+                      VarianceParams(2, 3, &vpx_variance4x8_c),
+                      VarianceParams(2, 2, &vpx_variance4x4_c)));
+
+typedef TestParams<vpx_subpixvariance_fn_t> SubpelVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    C, VpxSubpelVarianceTest,
+    ::testing::Values(
+        SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_c, 0),
+        SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_c, 0),
+        SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_c, 0),
+        SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_c, 0),
+        SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_c, 0),
+        SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_c, 0),
+        SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_c, 0),
+        SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_c, 0),
+        SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_c, 0),
+        SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_c, 0),
+        SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_c, 0),
+        SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_c, 0),
+        SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_c, 0)));
+
+typedef TestParams<vpx_subp_avg_variance_fn_t> SubpelAvgVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    C, VpxSubpelAvgVarianceTest,
+    ::testing::Values(
+        SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0),
+        SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0),
+        SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0),
+        SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0),
+        SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0),
+        SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0),
+        SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0),
+        SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0),
+        SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0),
+        SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0),
+        SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0),
+        SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0),
+        SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0)));
+
+INSTANTIATE_TEST_SUITE_P(SVP64, SumOfSquaresTest,
+                         ::testing::Values(vpx_get_mb_ss_svp64));
+
+typedef TestParams<Get4x4SseFunc> SseParams;
+INSTANTIATE_TEST_SUITE_P(SVP64, VpxSseTest,
+                         ::testing::Values(SseParams(2, 2,
+                                                     &vpx_get4x4sse_cs_svp64)));
+
+typedef TestParams<vpx_variance_fn_t> MseParams;
+INSTANTIATE_TEST_SUITE_P(SVP64, VpxMseTest,
+                         ::testing::Values(MseParams(4, 4, &vpx_mse16x16_svp64),
+                                           MseParams(4, 3, &vpx_mse16x8_svp64),
+                                           MseParams(3, 4, &vpx_mse8x16_svp64),
+                                           MseParams(3, 3, &vpx_mse8x8_svp64)));
+
+typedef TestParams<vpx_variance_fn_t> VarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    SVP64, VpxVarianceTest,
+    ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_svp64),
+                      VarianceParams(6, 5, &vpx_variance64x32_svp64),
+                      VarianceParams(5, 6, &vpx_variance32x64_svp64),
+                      VarianceParams(5, 5, &vpx_variance32x32_svp64),
+                      VarianceParams(5, 4, &vpx_variance32x16_svp64),
+                      VarianceParams(4, 5, &vpx_variance16x32_svp64),
+                      VarianceParams(4, 4, &vpx_variance16x16_svp64),
+                      VarianceParams(4, 3, &vpx_variance16x8_svp64),
+                      VarianceParams(3, 4, &vpx_variance8x16_svp64),
+                      VarianceParams(3, 3, &vpx_variance8x8_svp64),
+                      VarianceParams(3, 2, &vpx_variance8x4_svp64),
+                      VarianceParams(2, 3, &vpx_variance4x8_svp64),
+                      VarianceParams(2, 2, &vpx_variance4x4_svp64)));
+
+typedef TestParams<vpx_subpixvariance_fn_t> SubpelVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    SVP64, VpxSubpelVarianceTest,
+    ::testing::Values(
+        SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_svp64, 0),
+        SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_svp64, 0),
+        SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_svp64, 0),
+        SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_svp64, 0),
+        SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_svp64, 0),
+        SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_svp64, 0),
+        SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_svp64, 0),
+        SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_svp64, 0),
+        SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_svp64, 0),
+        SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_svp64, 0),
+        SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_svp64, 0),
+        SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_svp64, 0),
+        SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_svp64, 0)));
+
+typedef TestParams<vpx_subp_avg_variance_fn_t> SubpelAvgVarianceParams;
+INSTANTIATE_TEST_SUITE_P(
+    SVP64, VpxSubpelAvgVarianceTest,
+    ::testing::Values(
+        SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_svp64, 0),
+        SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_svp64, 0),
+        SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_svp64, 0),
+        SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_svp64, 0),
+        SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_svp64, 0),
+        SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_svp64, 0),
+        SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_svp64, 0),
+        SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_svp64, 0),
+        SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_svp64, 0),
+        SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_svp64, 0),
+        SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_svp64, 0),
+        SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_svp64, 0),
+        SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_svp64, 0)));
+
+}  // namespace
diff --git a/media/video/libvpx/variancefuncs_svp64.c.in b/media/video/libvpx/variancefuncs_svp64.c.in
new file mode 100644
index 00000000..94ecf4db
--- /dev/null
+++ b/media/video/libvpx/variancefuncs_svp64.c.in
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+uint32_t vpx_get_mb_ss_svp64_real(const int16_t *src_ptr) {
+  unsigned int i, sum = 0;
+
+  for (i = 0; i < 256; ++i) {
+    sum += src_ptr[i] * src_ptr[i];
+  }
+
+  return sum;
+}
+
diff --git a/media/video/libvpx/variancefuncs_svp64.s b/media/video/libvpx/variancefuncs_svp64.s
new file mode 100644
index 00000000..6d33d6b9
--- /dev/null
+++ b/media/video/libvpx/variancefuncs_svp64.s
@@ -0,0 +1,27 @@
+	.file	"variancefuncs_svp64.c"
+	.abiversion 2
+	.section	".text"
+	.align 2
+	.globl vpx_get_mb_ss_svp64_real
+	.type	vpx_get_mb_ss_svp64_real, @function
+vpx_get_mb_ss_svp64_real:
+.LFB0:
+	.cfi_startproc
+	addi 10,3,-2
+	li 3,0
+	li 9,256
+	mtctr 9
+.L2:
+	lhau 9,2(10)
+	mullw 9,9,9
+	add 9,9,3
+	rldicl 3,9,0,32
+	bdnz .L2
+	blr
+	.long 0
+	.byte 0,0,0,0,0,0,0,0
+	.cfi_endproc
+.LFE0:
+	.size	vpx_get_mb_ss_svp64_real,.-vpx_get_mb_ss_svp64_real
+	.ident	"GCC: (Debian 8.3.0-6) 8.3.0"
+	.section	.note.GNU-stack,"",@progbits
diff --git a/media/video/libvpx/vpx_mem.c b/media/video/libvpx/vpx_mem.c
new file mode 100644
index 00000000..27f978ce
--- /dev/null
+++ b/media/video/libvpx/vpx_mem.c
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx_mem.h"
+#include "vpx_mem_intrnl.h"
+#include "vpx_integer.h"
+
+#if !defined(VPX_MAX_ALLOCABLE_MEMORY)
+#if SIZE_MAX > (1ULL << 40)
+#define VPX_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+#else
+// For 32-bit targets keep this below INT_MAX to avoid valgrind warnings.
+#define VPX_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16))
+#endif
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
+  const uint64_t total_size = nmemb * size;
+  if (nmemb == 0) return 1;
+  if (size > VPX_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+  if (total_size != (size_t)total_size) return 0;
+
+  return 1;
+}
+
+static size_t *get_malloc_address_location(void *const mem) {
+  return ((size_t *)mem) - 1;
+}
+
+static uint64_t get_aligned_malloc_size(size_t size, size_t align) {
+  return (uint64_t)size + align - 1 + ADDRESS_STORAGE_SIZE;
+}
+
+static void set_actual_malloc_address(void *const mem,
+                                      const void *const malloc_addr) {
+  size_t *const malloc_addr_location = get_malloc_address_location(mem);
+  *malloc_addr_location = (size_t)malloc_addr;
+}
+
+static void *get_actual_malloc_address(void *const mem) {
+  size_t *const malloc_addr_location = get_malloc_address_location(mem);
+  return (void *)(*malloc_addr_location);
+}
+
+void *vpx_memalign(size_t align, size_t size) {
+  void *x = NULL, *addr;
+  const uint64_t aligned_size = get_aligned_malloc_size(size, align);
+  if (!check_size_argument_overflow(1, aligned_size)) return NULL;
+
+  addr = malloc((size_t)aligned_size);
+  if (addr) {
+    x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, align);
+    set_actual_malloc_address(x, addr);
+  }
+  return x;
+}
+
+void *vpx_malloc(size_t size) { return vpx_memalign(DEFAULT_ALIGNMENT, size); }
+
+void *vpx_calloc(size_t num, size_t size) {
+  void *x;
+  if (!check_size_argument_overflow(num, size)) return NULL;
+
+  x = vpx_malloc(num * size);
+  if (x) memset(x, 0, num * size);
+  return x;
+}
+
+void vpx_free(void *memblk) {
+  if (memblk) {
+    void *addr = get_actual_malloc_address(memblk);
+    free(addr);
+  }
+}
-- 
2.30.2