--- /dev/null
+TARGET=dav1d_svp64_test
+
+CC=gcc
+CXX=g++
+AS=powerpc64le-linux-gnu-as
+OBJCOPY=powerpc64le-linux-gnu-objcopy
+CFLAGS= -O -g3 -std=c99 -I../../pypowersim_wrapper -I. -Iinclude -I/usr/include/python3.7m -DHAVE_SVP64 -D_GNU_SOURCE -DNDEBUG -D_FILE_OFFSET_BITS=64 -DBITDEPTH=16 -Wundef -Werror=vla -Wno-maybe-uninitialized -Wno-missing-field-initializers -Wno-unused-parameter -Wstrict-prototypes -Werror=missing-prototypes -fomit-frame-pointer
+CXXFLAGS= -Iinclude -O -g3
+ASFLAGS= -mlibresoc -mregnames
+LDFLAGS=-lgtest -pthread -lpython3.7m
+
+BINFILES =
+ASFILES = src/ppc/cdef_tmpl_svp64_real.s
+CFILES = cdef.c checkasm.c src/cpu.c src/ppc/cpu.c src/ppc/cdef_tmpl_svp64_wrapper.c src/cdef_tmpl.c src/tables.c src/log.c
+CPPFILES =
+OBJFILES = $(ASFILES:.s=.o) $(CFILES:.c=.o) $(CPPFILES:.cc=.o)
+
+%.bin: %.o
+ ${OBJCOPY} -I elf64-little -O binary $< $@
+
+${TARGET}: ${OBJFILES}
+ #${OBJCOPY} --globalize-symbols=src/filmgrain_tmpl.symbols src/filmgrain_tmpl.o
+ ${CXX} -o ${TARGET} ${OBJFILES} ${LDFLAGS}
+
+all: ${TARGET} ${BINFILES}
+
+.PHONY: clean
+clean:
+ rm -f ${TARGET} ${OBJFILES} ${BINFILES}
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "checkasm.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#include "common/dump.h"
+
+#include "src/levels.h"
+#include "src/cdef.h"
+
+static int to_binary(int x) { /* 0-15 -> 0000-1111 */
+ return (x & 1) + 5 * (x & 2) + 25 * (x & 4) + 125 * (x & 8);
+}
+
+static void init_tmp(pixel *buf, int n, const int bitdepth_max) {
+ const int fill_type = rnd() & 7;
+ if (fill_type == 0)
+ while (n--) /* check for cdef_filter underflows */
+ *buf++ = rnd() & 1;
+ else if (fill_type == 1)
+ while (n--) /* check for cdef_filter overflows */
+ *buf++ = bitdepth_max - (rnd() & 1);
+ else
+ while (n--)
+ *buf++ = rnd() & bitdepth_max;
+}
+
+static void check_cdef_filter(const cdef_fn fn, const int w, const int h) {
+ ALIGN_STK_64(pixel, c_src, 16 * 10 + 16, ), *const c_dst = c_src + 8;
+ ALIGN_STK_64(pixel, a_src, 16 * 10 + 16, ), *const a_dst = a_src + 8;
+ ALIGN_STK_64(pixel, top_buf, 16 * 2 + 16, ), *const top = top_buf + 8;
+ ALIGN_STK_64(pixel, bot_buf, 16 * 2 + 16, ), *const bot = bot_buf + 8;
+ ALIGN_STK_16(pixel, left, 8,[2]);
+ const ptrdiff_t stride = 16 * sizeof(pixel);
+
+ declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel (*left)[2],
+ const pixel *top, const pixel *bot, int pri_strength,
+ int sec_strength, int dir, int damping,
+ enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX);
+
+ for (int s = 0x1; s <= 0x3; s++) {
+ if (check_func(fn, "cdef_filter_%dx%d_%02d_%dbpc", w, h, to_binary(s), BITDEPTH)) {
+ for (int dir = 0; dir < 8; dir++) {
+ for (enum CdefEdgeFlags edges = 0x0; edges <= 0xf; edges++) {
+#if BITDEPTH == 16
+ const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+
+ init_tmp(c_src, 16 * 10 + 16, bitdepth_max);
+ init_tmp(top_buf, 16 * 2 + 16, bitdepth_max);
+ init_tmp(bot_buf, 16 * 2 + 16, bitdepth_max);
+ init_tmp((pixel *) left, 8 * 2, bitdepth_max);
+ memcpy(a_src, c_src, (16 * 10 + 16) * sizeof(pixel));
+
+ const int pri_strength = s & 2 ? (1 + (rnd() % 15)) << bitdepth_min_8 : 0;
+ const int sec_strength = s & 1 ? 1 << ((rnd() % 3) + bitdepth_min_8) : 0;
+ const int damping = 3 + (rnd() & 3) + bitdepth_min_8 - (w == 4 || (rnd() & 1));
+ call_ref(c_dst, stride, left, top, bot, pri_strength, sec_strength,
+ dir, damping, edges HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, stride, left, top, bot, pri_strength, sec_strength,
+ dir, damping, edges HIGHBD_TAIL_SUFFIX);
+ if (checkasm_check_pixel(c_dst, stride, a_dst, stride, w, h, "dst")) {
+ fprintf(stderr, "strength = %d:%d, dir = %d, damping = %d, edges = %04d\n",
+ pri_strength, sec_strength, dir, damping, to_binary(edges));
+ return;
+ }
+ if (dir == 7 && (edges == 0x5 || edges == 0xa || edges == 0xf))
+ bench_new(alternate(c_dst, a_dst), stride, left, top, bot, pri_strength,
+ sec_strength, dir, damping, edges HIGHBD_TAIL_SUFFIX);
+ }
+ }
+ }
+ }
+}
+
+static void check_cdef_direction(const cdef_dir_fn fn) {
+ ALIGN_STK_64(pixel, src, 8 * 8,);
+
+ declare_func(int, pixel *src, ptrdiff_t dst_stride, unsigned *var
+ HIGHBD_DECL_SUFFIX);
+
+ if (check_func(fn, "cdef_dir_%dbpc", BITDEPTH)) {
+ unsigned c_var, a_var;
+#if BITDEPTH == 16
+ const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
+#else
+ const int bitdepth_max = 0xff;
+#endif
+ init_tmp(src, 64, bitdepth_max);
+
+ const int c_dir = call_ref(src, 8 * sizeof(pixel), &c_var HIGHBD_TAIL_SUFFIX);
+ const int a_dir = call_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
+ if (c_var != a_var || c_dir != a_dir) {
+ if (fail()) {
+ hex_fdump(stderr, src, 8 * sizeof(pixel), 8, 8, "src");
+ fprintf(stderr, "c_dir %d a_dir %d\n", c_dir, a_dir);
+ }
+ }
+ bench_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX);
+ }
+ report("cdef_dir");
+}
+
+void bitfn(checkasm_check_cdef)(void) {
+ Dav1dCdefDSPContext c;
+ bitfn(dav1d_cdef_dsp_init)(&c);
+
+ check_cdef_direction(c.dir);
+
+ check_cdef_filter(c.fb[0], 8, 8);
+ check_cdef_filter(c.fb[1], 4, 8);
+ check_cdef_filter(c.fb[2], 4, 4);
+ report("cdef_filter");
+}
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "checkasm.h"
+
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "src/cpu.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#define COLOR_RED FOREGROUND_RED
+#define COLOR_GREEN FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#include <unistd.h>
+#include <signal.h>
+#include <time.h>
+#ifdef __APPLE__
+#include <mach/mach_time.h>
+#endif
+#define COLOR_RED 1
+#define COLOR_GREEN 2
+#define COLOR_YELLOW 3
+#endif
+
+/* List of tests to invoke */
+static const struct {
+ const char *name;
+ void (*func)(void);
+} tests[] = {
+#if CONFIG_16BPC
+ { "cdef_16bpc", checkasm_check_cdef_16bpc },
+#endif
+ { 0 }
+};
+
+/* List of cpu flags to check */
+static const struct {
+ const char *name;
+ const char *suffix;
+ unsigned flag;
+} cpus[] = {
+#if ARCH_X86
+ { "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 },
+ { "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 },
+ { "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 },
+ { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
+ { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL },
+#elif ARCH_AARCH64 || ARCH_ARM
+ { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
+#elif ARCH_PPC64LE
+ { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX },
+ { "SVP64", "svp64", DAV1D_PPC_CPU_FLAG_SVP64 },
+#endif
+ { 0 }
+};
+
+typedef struct CheckasmFuncVersion {
+ struct CheckasmFuncVersion *next;
+ void *func;
+ int ok;
+ unsigned cpu;
+ int iterations;
+ uint64_t cycles;
+} CheckasmFuncVersion;
+
+/* Binary search tree node */
+typedef struct CheckasmFunc {
+ struct CheckasmFunc *child[2];
+ CheckasmFuncVersion versions;
+ uint8_t color; /* 0 = red, 1 = black */
+ char name[];
+} CheckasmFunc;
+
+/* Internal state */
+static struct {
+ CheckasmFunc *funcs;
+ CheckasmFunc *current_func;
+ CheckasmFuncVersion *current_func_ver;
+ const char *current_test_name;
+ int num_checked;
+ int num_failed;
+ int nop_time;
+ unsigned cpu_flag;
+ const char *cpu_flag_name;
+ const char *test_pattern;
+ const char *function_pattern;
+ unsigned seed;
+ int bench;
+ int bench_c;
+ int verbose;
+ int function_listing;
+ int catch_signals;
+#if ARCH_X86_64
+ void (*simd_warmup)(void);
+#endif
+} state;
+
+/* float compare support code */
+typedef union {
+ float f;
+ uint32_t i;
+} intfloat;
+
+static uint32_t xs_state[4];
+
+static void xor128_srand(unsigned seed) {
+ xs_state[0] = seed;
+ xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff);
+ xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff);
+ xs_state[3] = ~seed;
+}
+
+// xor128 from Marsaglia, George (July 2003). "Xorshift RNGs".
+// Journal of Statistical Software. 8 (14).
+// doi:10.18637/jss.v008.i14.
+int xor128_rand(void) {
+ const uint32_t x = xs_state[0];
+ const uint32_t t = x ^ (x << 11);
+
+ xs_state[0] = xs_state[1];
+ xs_state[1] = xs_state[2];
+ xs_state[2] = xs_state[3];
+ uint32_t w = xs_state[3];
+
+ w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
+ xs_state[3] = w;
+
+ return w >> 1;
+}
+
+static int is_negative(const intfloat u) {
+ return u.i >> 31;
+}
+
+int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
+ intfloat x, y;
+
+ x.f = a;
+ y.f = b;
+
+ if (is_negative(x) != is_negative(y)) {
+ // handle -0.0 == +0.0
+ return a == b;
+ }
+
+ if (llabs((int64_t)x.i - y.i) <= max_ulp)
+ return 1;
+
+ return 0;
+}
+
+int float_near_ulp_array(const float *const a, const float *const b,
+ const unsigned max_ulp, const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_ulp(a[i], b[i], max_ulp))
+ return 0;
+
+ return 1;
+}
+
+int float_near_abs_eps(const float a, const float b, const float eps) {
+ return fabsf(a - b) < eps;
+}
+
+int float_near_abs_eps_array(const float *const a, const float *const b,
+ const float eps, const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_abs_eps(a[i], b[i], eps))
+ return 0;
+
+ return 1;
+}
+
+int float_near_abs_eps_ulp(const float a, const float b, const float eps,
+ const unsigned max_ulp)
+{
+ return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
+ const float eps, const unsigned max_ulp,
+ const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+ return 0;
+
+ return 1;
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(const int color, const char *const fmt, ...) {
+ static int8_t use_color = -1;
+ va_list arg;
+
+#ifdef _WIN32
+ static HANDLE con;
+ static WORD org_attributes;
+
+ if (use_color < 0) {
+ CONSOLE_SCREEN_BUFFER_INFO con_info;
+ con = GetStdHandle(STD_ERROR_HANDLE);
+ if (con && con != INVALID_HANDLE_VALUE &&
+ GetConsoleScreenBufferInfo(con, &con_info))
+ {
+ org_attributes = con_info.wAttributes;
+ use_color = 1;
+ } else
+ use_color = 0;
+ }
+ if (use_color)
+ SetConsoleTextAttribute(con, (org_attributes & 0xfff0) |
+ (color & 0x0f));
+#else
+ if (use_color < 0) {
+ const char *const term = getenv("TERM");
+ use_color = term && strcmp(term, "dumb") && isatty(2);
+ }
+ if (use_color)
+ fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+ va_start(arg, fmt);
+ vfprintf(stderr, fmt, arg);
+ va_end(arg);
+
+ if (use_color) {
+#ifdef _WIN32
+ SetConsoleTextAttribute(con, org_attributes);
+#else
+ fprintf(stderr, "\x1b[0m");
+#endif
+ }
+}
+
+/* Deallocate a tree */
+static void destroy_func_tree(CheckasmFunc *const f) {
+ if (f) {
+ CheckasmFuncVersion *v = f->versions.next;
+ while (v) {
+ CheckasmFuncVersion *next = v->next;
+ free(v);
+ v = next;
+ }
+
+ destroy_func_tree(f->child[0]);
+ destroy_func_tree(f->child[1]);
+ free(f);
+ }
+}
+
+/* Allocate a zero-initialized block, clean up and exit on failure */
+static void *checkasm_malloc(const size_t size) {
+ void *const ptr = calloc(1, size);
+ if (!ptr) {
+ fprintf(stderr, "checkasm: malloc failed\n");
+ destroy_func_tree(state.funcs);
+ exit(1);
+ }
+ return ptr;
+}
+
+/* Get the suffix of the specified cpu flag */
+static const char *cpu_suffix(const unsigned cpu) {
+ for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--)
+ if (cpu & cpus[i].flag)
+ return cpus[i].suffix;
+
+ return "c";
+}
+
+#ifdef readtime
+static int cmp_nop(const void *a, const void *b) {
+ return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static int measure_nop_time(void) {
+ uint16_t nops[10000];
+ int nop_sum = 0;
+
+ for (int i = 0; i < 10000; i++) {
+ uint64_t t = readtime();
+ nops[i] = (uint16_t) (readtime() - t);
+ }
+
+ qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+ for (int i = 2500; i < 7500; i++)
+ nop_sum += nops[i];
+
+ return nop_sum / 500;
+}
+
+/* Print benchmark results */
+static void print_benchs(const CheckasmFunc *const f) {
+ if (f) {
+ print_benchs(f->child[0]);
+
+ /* Only print functions with at least one assembly version */
+ if (state.bench_c || f->versions.cpu || f->versions.next) {
+ const CheckasmFuncVersion *v = &f->versions;
+ do {
+ if (v->iterations) {
+ const int decicycles = (int) (10*v->cycles/v->iterations -
+ state.nop_time) / 4;
+ printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu),
+ decicycles/10, decicycles%10);
+ }
+ } while ((v = v->next));
+ }
+
+ print_benchs(f->child[1]);
+ }
+}
+#endif
+
+static void print_functions(const CheckasmFunc *const f) {
+ if (f) {
+ print_functions(f->child[0]);
+ printf("%s\n", f->name);
+ print_functions(f->child[1]);
+ }
+}
+
+#define is_digit(x) ((x) >= '0' && (x) <= '9')
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b) {
+ const char *const start = a;
+ int ascii_diff, digit_diff;
+
+ for (; !(ascii_diff = *(const unsigned char*)a -
+ *(const unsigned char*)b) && *a; a++, b++);
+ for (; is_digit(*a) && is_digit(*b); a++, b++);
+
+ if (a > start && is_digit(a[-1]) &&
+ (digit_diff = is_digit(*a) - is_digit(*b)))
+ {
+ return digit_diff;
+ }
+
+ return ascii_diff;
+}
+
+/* Perform a tree rotation in the specified direction and return the new root */
+static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
+ CheckasmFunc *const r = f->child[dir^1];
+ f->child[dir^1] = r->child[dir];
+ r->child[dir] = f;
+ r->color = f->color;
+ f->color = 0;
+ return r;
+}
+
+#define is_red(f) ((f) && !(f)->color)
+
+/* Balance a left-leaning red-black tree at the specified node */
+static void balance_tree(CheckasmFunc **const root) {
+ CheckasmFunc *const f = *root;
+
+ if (is_red(f->child[0]) && is_red(f->child[1])) {
+ f->color ^= 1;
+ f->child[0]->color = f->child[1]->color = 1;
+ }
+ else if (!is_red(f->child[0]) && is_red(f->child[1]))
+ *root = rotate_tree(f, 0); /* Rotate left */
+ else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
+ *root = rotate_tree(f, 1); /* Rotate right */
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) {
+ CheckasmFunc *f = *root;
+
+ if (f) {
+ /* Search the tree for a matching node */
+ const int cmp = cmp_func_names(name, f->name);
+ if (cmp) {
+ f = get_func(&f->child[cmp > 0], name);
+
+ /* Rebalance the tree on the way up if a new node was inserted */
+ if (!f->versions.func)
+ balance_tree(root);
+ }
+ } else {
+ /* Allocate and insert a new node into the tree */
+ const size_t name_length = strlen(name) + 1;
+ f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length);
+ memcpy(f->name, name, name_length);
+ }
+
+ return f;
+}
+
+checkasm_context checkasm_context_buf;
+
+/* Crash handling: attempt to catch crashes and handle them
+ * gracefully instead of just aborting abruptly. */
+#ifdef _WIN32
+static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) {
+ if (!state.catch_signals)
+ return EXCEPTION_CONTINUE_SEARCH;
+
+ const char *err;
+ switch (e->ExceptionRecord->ExceptionCode) {
+ case EXCEPTION_FLT_DIVIDE_BY_ZERO:
+ case EXCEPTION_INT_DIVIDE_BY_ZERO:
+ err = "fatal arithmetic error";
+ break;
+ case EXCEPTION_ILLEGAL_INSTRUCTION:
+ case EXCEPTION_PRIV_INSTRUCTION:
+ err = "illegal instruction";
+ break;
+ case EXCEPTION_ACCESS_VIOLATION:
+ case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+ case EXCEPTION_DATATYPE_MISALIGNMENT:
+ case EXCEPTION_IN_PAGE_ERROR:
+ case EXCEPTION_STACK_OVERFLOW:
+ err = "segmentation fault";
+ break;
+ default:
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+ state.catch_signals = 0;
+ checkasm_fail_func(err);
+ checkasm_load_context();
+ return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */
+}
+#else
+static void signal_handler(const int s) {
+ if (state.catch_signals) {
+ state.catch_signals = 0;
+ checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" :
+ s == SIGILL ? "illegal instruction" :
+ "segmentation fault");
+ checkasm_load_context();
+ } else {
+ /* fall back to the default signal handler */
+ static const struct sigaction default_sa = { .sa_handler = SIG_DFL };
+ sigaction(s, &default_sa, NULL);
+ raise(s);
+ }
+}
+#endif
+
+/* Compares a string with a wildcard pattern. */
+static int wildstrcmp(const char *str, const char *pattern) {
+ const char *wild = strchr(pattern, '*');
+ if (wild) {
+ const size_t len = wild - pattern;
+ if (strncmp(str, pattern, len)) return 1;
+ while (*++wild == '*');
+ if (!*wild) return 0;
+ str += len;
+ while (*str && wildstrcmp(str, wild)) str++;
+ return !*str;
+ }
+ return strcmp(str, pattern);
+}
+
+/* Perform tests and benchmarks for the specified
+ * cpu flag if supported by the host */
+static void check_cpu_flag(const char *const name, unsigned flag) {
+ const unsigned old_cpu_flag = state.cpu_flag;
+
+ flag |= old_cpu_flag;
+ dav1d_set_cpu_flags_mask(flag);
+ state.cpu_flag = dav1d_get_cpu_flags();
+
+ if (!flag || state.cpu_flag != old_cpu_flag) {
+ state.cpu_flag_name = name;
+ for (int i = 0; tests[i].func; i++) {
+ if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern))
+ continue;
+ xor128_srand(state.seed);
+ state.current_test_name = tests[i].name;
+ tests[i].func();
+ }
+ }
+}
+
+/* Print the name of the current CPU flag, but only do it once */
+static void print_cpu_name(void) {
+ if (state.cpu_flag_name) {
+ color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
+ state.cpu_flag_name = NULL;
+ }
+}
+
+static unsigned get_seed(void) {
+#ifdef _WIN32
+ LARGE_INTEGER i;
+ QueryPerformanceCounter(&i);
+ return i.LowPart;
+#elif defined(__APPLE__)
+ return (unsigned) mach_absolute_time();
+#else
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec);
+#endif
+}
+
+int main(int argc, char *argv[]) {
+ state.seed = get_seed();
+
+ while (argc > 1) {
+ if (!strncmp(argv[1], "--help", 6) || !strcmp(argv[1], "-h")) {
+ fprintf(stderr,
+ "checkasm [options] <random seed>\n"
+ " <random seed> Numeric value to seed the rng\n"
+ "Options:\n"
+ " --test=<pattern> Test only <pattern>\n"
+ " --function=<pattern> -f Test only the functions matching <pattern>\n"
+ " --bench -b Benchmark the tested functions\n"
+ " --list-functions List available functions\n"
+ " --list-tests List available tests\n"
+ " --bench-c -c Benchmark the C-only functions\n"
+ " --verbose -v Print failures verbosely\n");
+ return 0;
+ } else if (!strcmp(argv[1], "--bench-c") || !strcmp(argv[1], "-c")) {
+ state.bench_c = 1;
+ } else if (!strcmp(argv[1], "--bench") || !strcmp(argv[1], "-b")) {
+#ifndef readtime
+ fprintf(stderr,
+ "checkasm: --bench is not supported on your system\n");
+ return 1;
+#endif
+ state.bench = 1;
+ } else if (!strncmp(argv[1], "--test=", 7)) {
+ state.test_pattern = argv[1] + 7;
+ } else if (!strcmp(argv[1], "-t")) {
+ state.test_pattern = argc > 1 ? argv[2] : "";
+ argc--;
+ argv++;
+ } else if (!strncmp(argv[1], "--function=", 11)) {
+ state.function_pattern = argv[1] + 11;
+ } else if (!strcmp(argv[1], "-f")) {
+ state.function_pattern = argc > 1 ? argv[2] : "";
+ argc--;
+ argv++;
+ } else if (!strcmp(argv[1], "--list-functions")) {
+ state.function_listing = 1;
+ } else if (!strcmp(argv[1], "--list-tests")) {
+ for (int i = 0; tests[i].name; i++)
+ printf("%s\n", tests[i].name);
+ return 0;
+ } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) {
+ state.verbose = 1;
+ } else {
+ state.seed = (unsigned) strtoul(argv[1], NULL, 10);
+ }
+
+ argc--;
+ argv++;
+ }
+
+#if TRIM_DSP_FUNCTIONS
+ fprintf(stderr, "checkasm: reference functions unavailable\n");
+ return 0;
+#endif
+
+ dav1d_init_cpu();
+
+#ifdef _WIN32
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ AddVectoredExceptionHandler(0, signal_handler);
+#endif
+#else
+ const struct sigaction sa = {
+ .sa_handler = signal_handler,
+ .sa_flags = SA_NODEFER,
+ };
+ sigaction(SIGBUS, &sa, NULL);
+ sigaction(SIGFPE, &sa, NULL);
+ sigaction(SIGILL, &sa, NULL);
+ sigaction(SIGSEGV, &sa, NULL);
+#endif
+
+#ifdef readtime
+ if (state.bench) {
+ static int testing = 0;
+ checkasm_save_context();
+ if (!testing) {
+ checkasm_set_signal_handler_state(1);
+ testing = 1;
+ readtime();
+ checkasm_set_signal_handler_state(0);
+ } else {
+ fprintf(stderr, "checkasm: unable to access cycle counter\n");
+ return 1;
+ }
+ }
+#endif
+
+ int ret = 0;
+
+ if (!state.function_listing) {
+#if ARCH_X86_64
+ void checkasm_warmup_avx2(void);
+ void checkasm_warmup_avx512(void);
+ const unsigned cpu_flags = dav1d_get_cpu_flags();
+ if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
+ state.simd_warmup = checkasm_warmup_avx512;
+ else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
+ state.simd_warmup = checkasm_warmup_avx2;
+ checkasm_simd_warmup();
+#endif
+#if ARCH_X86
+ unsigned checkasm_init_x86(char *name);
+ char name[48];
+ const unsigned cpuid = checkasm_init_x86(name);
+ for (size_t len = strlen(name); len && name[len-1] == ' '; len--)
+ name[len-1] = '\0'; /* trim trailing whitespace */
+ fprintf(stderr, "checkasm: %s (%08X) using random seed %u\n", name, cpuid, state.seed);
+#else
+ fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
+#endif
+ }
+
+ check_cpu_flag(NULL, 0);
+
+ if (state.function_listing) {
+ print_functions(state.funcs);
+ } else {
+ for (int i = 0; cpus[i].flag; i++)
+ check_cpu_flag(cpus[i].name, cpus[i].flag);
+ if (!state.num_checked) {
+ fprintf(stderr, "checkasm: no tests to perform\n");
+ } else if (state.num_failed) {
+ fprintf(stderr, "checkasm: %d of %d tests have failed\n",
+ state.num_failed, state.num_checked);
+ ret = 1;
+ } else {
+ fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
+#ifdef readtime
+ if (state.bench) {
+ state.nop_time = measure_nop_time();
+ printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
+ print_benchs(state.funcs);
+ }
+#endif
+ }
+ }
+
+ destroy_func_tree(state.funcs);
+ return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+void *checkasm_check_func(void *const func, const char *const name, ...) {
+ char name_buf[256];
+ va_list arg;
+
+ va_start(arg, name);
+ const int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+ va_end(arg);
+
+ if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf) ||
+ (state.function_pattern && wildstrcmp(name_buf, state.function_pattern)))
+ {
+ return NULL;
+ }
+
+ state.current_func = get_func(&state.funcs, name_buf);
+
+ if (state.function_listing) /* Save function names without running tests */
+ return NULL;
+
+ state.funcs->color = 1;
+ CheckasmFuncVersion *v = &state.current_func->versions;
+ void *ref = func;
+
+ if (v->func) {
+ CheckasmFuncVersion *prev;
+ do {
+ /* Only test functions that haven't already been tested */
+ if (v->func == func)
+ return NULL;
+
+ if (v->ok)
+ ref = v->func;
+
+ prev = v;
+ } while ((v = v->next));
+
+ v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
+ }
+
+ v->func = func;
+ v->ok = 1;
+ v->cpu = state.cpu_flag;
+ state.current_func_ver = v;
+ xor128_srand(state.seed);
+
+ if (state.cpu_flag || state.bench_c)
+ state.num_checked++;
+
+ return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void) {
+ return !state.num_failed && state.bench;
+}
+
+/* Indicate that the current test has failed, return whether verbose printing
+ * is requested. */
+int checkasm_fail_func(const char *const msg, ...) {
+ if (state.current_func_ver && state.current_func_ver->cpu &&
+ state.current_func_ver->ok)
+ {
+ va_list arg;
+
+ print_cpu_name();
+ fprintf(stderr, " %s_%s (", state.current_func->name,
+ cpu_suffix(state.current_func_ver->cpu));
+ va_start(arg, msg);
+ vfprintf(stderr, msg, arg);
+ va_end(arg);
+ fprintf(stderr, ")\n");
+
+ state.current_func_ver->ok = 0;
+ state.num_failed++;
+ }
+ return state.verbose;
+}
+
+/* Update benchmark results of the current function */
+void checkasm_update_bench(const int iterations, const uint64_t cycles) {
+ state.current_func_ver->iterations += iterations;
+ state.current_func_ver->cycles += cycles;
+}
+
+/* Print the outcome of all tests performed since
+ * the last time this function was called */
+void checkasm_report(const char *const name, ...) {
+ static int prev_checked, prev_failed;
+ static size_t max_length;
+
+ if (state.num_checked > prev_checked) {
+ int pad_length = (int) max_length + 4;
+ va_list arg;
+
+ print_cpu_name();
+ pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
+ va_start(arg, name);
+ pad_length -= vfprintf(stderr, name, arg);
+ va_end(arg);
+ fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
+
+ if (state.num_failed == prev_failed)
+ color_printf(COLOR_GREEN, "OK");
+ else
+ color_printf(COLOR_RED, "FAILED");
+ fprintf(stderr, "]\n");
+
+ prev_checked = state.num_checked;
+ prev_failed = state.num_failed;
+ } else if (!state.cpu_flag) {
+ /* Calculate the amount of padding required
+ * to make the output vertically aligned */
+ size_t length = strlen(state.current_test_name);
+ va_list arg;
+
+ va_start(arg, name);
+ length += vsnprintf(NULL, 0, name, arg);
+ va_end(arg);
+
+ if (length > max_length)
+ max_length = length;
+ }
+}
+
+void checkasm_set_signal_handler_state(const int enabled) {
+ state.catch_signals = enabled;
+}
+
+static int check_err(const char *const file, const int line,
+ const char *const name, const int w, const int h,
+ int *const err)
+{
+ if (*err)
+ return 0;
+ if (!checkasm_fail_func("%s:%d", file, line))
+ return 1;
+ *err = 1;
+ fprintf(stderr, "%s (%dx%d):\n", name, w, h);
+ return 0;
+}
+
+#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \
+int checkasm_check_##type(const char *const file, const int line, \
+ const type *buf1, ptrdiff_t stride1, \
+ const type *buf2, ptrdiff_t stride2, \
+ const int w, int h, const char *const name, \
+ const int align_w, const int align_h, \
+ const int padding) \
+{ \
+ int aligned_w = (w + align_w - 1) & ~(align_w - 1); \
+ int aligned_h = (h + align_h - 1) & ~(align_h - 1); \
+ int err = 0; \
+ stride1 /= sizeof(*buf1); \
+ stride2 /= sizeof(*buf2); \
+ int y = 0; \
+ for (y = 0; y < h; y++) \
+ if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \
+ break; \
+ if (y != h) { \
+ if (check_err(file, line, name, w, h, &err)) \
+ return 1; \
+ for (y = 0; y < h; y++) { \
+ for (int x = 0; x < w; x++) \
+ fprintf(stderr, " " fmt, buf1[x]); \
+ fprintf(stderr, " "); \
+ for (int x = 0; x < w; x++) \
+ fprintf(stderr, " " fmt, buf2[x]); \
+ fprintf(stderr, " "); \
+ for (int x = 0; x < w; x++) \
+ fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \
+ buf1 += stride1; \
+ buf2 += stride2; \
+ fprintf(stderr, "\n"); \
+ } \
+ buf1 -= h*stride1; \
+ buf2 -= h*stride2; \
+ } \
+ for (y = -padding; y < 0; y++) \
+ if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+ (w + 2*padding)*sizeof(*buf1))) { \
+ if (check_err(file, line, name, w, h, &err)) \
+ return 1; \
+ fprintf(stderr, " overwrite above\n"); \
+ break; \
+ } \
+ for (y = aligned_h; y < aligned_h + padding; y++) \
+ if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+ (w + 2*padding)*sizeof(*buf1))) { \
+ if (check_err(file, line, name, w, h, &err)) \
+ return 1; \
+ fprintf(stderr, " overwrite below\n"); \
+ break; \
+ } \
+ for (y = 0; y < h; y++) \
+ if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \
+ padding*sizeof(*buf1))) { \
+ if (check_err(file, line, name, w, h, &err)) \
+ return 1; \
+ fprintf(stderr, " overwrite left\n"); \
+ break; \
+ } \
+ for (y = 0; y < h; y++) \
+ if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \
+ padding*sizeof(*buf1))) { \
+ if (check_err(file, line, name, w, h, &err)) \
+ return 1; \
+ fprintf(stderr, " overwrite right\n"); \
+ break; \
+ } \
+ return err; \
+}
+
+DEF_CHECKASM_CHECK_FUNC(int8_t, "%4d")
+DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
+DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
+DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x")
+DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
+DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x")
+
+#if ARCH_X86_64
+void checkasm_simd_warmup(void)
+{
+ if (state.simd_warmup)
+ state.simd_warmup();
+}
+#endif
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H
+#define DAV1D_TESTS_CHECKASM_CHECKASM_H
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if ARCH_X86_64 && defined(_WIN32)
+/* setjmp/longjmp on 64-bit Windows will try to use SEH to unwind the stack,
+ * which doesn't work for assembly functions without unwind information. */
+#include <windows.h>
+#define checkasm_context CONTEXT
+#define checkasm_save_context() RtlCaptureContext(&checkasm_context_buf)
+#define checkasm_load_context() RtlRestoreContext(&checkasm_context_buf, NULL)
+#else
+#include <setjmp.h>
+#define checkasm_context jmp_buf
+#define checkasm_save_context() setjmp(checkasm_context_buf)
+#define checkasm_load_context() longjmp(checkasm_context_buf, 1)
+#endif
+
+#include "include/common/attributes.h"
+#include "include/common/bitdepth.h"
+#include "include/common/intops.h"
+
+int xor128_rand(void);
+#define rnd xor128_rand
+
+#define decl_check_bitfns(name) \
+name##_8bpc(void); \
+name##_16bpc(void)
+
+void checkasm_check_msac(void);
+void checkasm_check_refmvs(void);
+decl_check_bitfns(void checkasm_check_cdef);
+decl_check_bitfns(void checkasm_check_filmgrain);
+decl_check_bitfns(void checkasm_check_ipred);
+decl_check_bitfns(void checkasm_check_itx);
+decl_check_bitfns(void checkasm_check_loopfilter);
+decl_check_bitfns(void checkasm_check_looprestoration);
+decl_check_bitfns(void checkasm_check_mc);
+
+void *checkasm_check_func(void *func, const char *name, ...);
+int checkasm_bench_func(void);
+int checkasm_fail_func(const char *msg, ...);
+void checkasm_update_bench(int iterations, uint64_t cycles);
+void checkasm_report(const char *name, ...);
+void checkasm_set_signal_handler_state(int enabled);
+extern checkasm_context checkasm_context_buf;
+
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+ int len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+ int len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+ unsigned max_ulp, int len);
+
+#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...)\
+ (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
+
+/* Declare the function prototype. The first argument is the return value,
+ * the remaining arguments are the function parameters. Naming parameters
+ * is optional. */
+#define declare_func(ret, ...)\
+ declare_new(ret, __VA_ARGS__)\
+ void *func_ref, *func_new;\
+ typedef ret func_type(__VA_ARGS__);\
+ checkasm_save_context()
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__)
+
+/* Print the test outcome */
+#define report checkasm_report
+
+/* Call the reference function */
+#define call_ref(...)\
+ (checkasm_set_signal_handler_state(1),\
+ ((func_type *)func_ref)(__VA_ARGS__));\
+ checkasm_set_signal_handler_state(0)
+
+#if HAVE_ASM
+#if ARCH_X86
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+#define readtime() (_mm_lfence(), __rdtsc())
+#else
+static inline uint64_t readtime(void) {
+ uint32_t eax, edx;
+ __asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx));
+ return (((uint64_t)edx) << 32) | eax;
+}
+#define readtime readtime
+#endif
+#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
+#include <mach/mach_time.h>
+#define readtime() mach_absolute_time()
+#elif ARCH_AARCH64
+#ifdef _MSC_VER
+#include <windows.h>
+#define readtime() (_InstructionSynchronizationBarrier(), ReadTimeStampCounter())
+#else
+static inline uint64_t readtime(void) {
+ uint64_t cycle_counter;
+ /* This requires enabling user mode access to the cycle counter (which
+ * can only be done from kernel space).
+ * This could also read cntvct_el0 instead of pmccntr_el0; that register
+ * might also be readable (depending on kernel version), but it has much
+ * worse precision (it's a fixed 50 MHz timer). */
+ __asm__ __volatile__("isb\nmrs %0, pmccntr_el0"
+ : "=r"(cycle_counter)
+ :: "memory");
+ return cycle_counter;
+}
+#define readtime readtime
+#endif
+#elif ARCH_ARM && !defined(_MSC_VER) && __ARM_ARCH >= 7
+static inline uint64_t readtime(void) {
+ uint32_t cycle_counter;
+ /* This requires enabling user mode access to the cycle counter (which
+ * can only be done from kernel space). */
+ __asm__ __volatile__("isb\nmrc p15, 0, %0, c9, c13, 0"
+ : "=r"(cycle_counter)
+ :: "memory");
+ return cycle_counter;
+}
+#define readtime readtime
+#elif ARCH_PPC64LE
+static inline uint64_t readtime(void) {
+ uint32_t tbu, tbl, temp;
+
+ __asm__ __volatile__(
+ "1:\n"
+ "mfspr %2,269\n"
+ "mfspr %0,268\n"
+ "mfspr %1,269\n"
+ "cmpw %2,%1\n"
+ "bne 1b\n"
+ : "=r"(tbl), "=r"(tbu), "=r"(temp)
+ :
+ : "cc");
+
+ return (((uint64_t)tbu) << 32) | (uint64_t)tbl;
+}
+#define readtime readtime
+#endif
+
+/* Verifies that clobbered callee-saved registers
+ * are properly saved and restored */
+void checkasm_checked_call(void *func, ...);
+
+#if ARCH_X86_64
+/* YMM and ZMM registers on x86 are turned off to save power when they haven't
+ * been used for some period of time. When they are used there will be a
+ * "warmup" period during which performance will be reduced and inconsistent
+ * which is problematic when trying to benchmark individual functions. We can
+ * work around this by periodically issuing "dummy" instructions that uses
+ * those registers to keep them powered on. */
+void checkasm_simd_warmup(void);
+
+/* The upper 32 bits of 32-bit data types are undefined when passed as function
+ * parameters. In practice those bits usually end up being zero which may hide
+ * certain bugs, such as using a register containing undefined bits as a pointer
+ * offset, so we want to intentionally clobber those bits with junk to expose
+ * any issues. The following set of macros automatically calculates a bitmask
+ * specifying which parameters should have their upper halves clobbered. */
+#ifdef _WIN32
+/* Integer and floating-point parameters share "register slots". */
+#define IGNORED_FP_ARGS 0
+#else
+/* Up to 8 floating-point parameters are passed in XMM registers, which are
+ * handled orthogonally from integer parameters passed in GPR registers. */
+#define IGNORED_FP_ARGS 8
+#endif
+#ifdef HAVE_C11_GENERIC
+#define clobber_type(arg) _Generic((void (*)(void*, arg))NULL,\
+ void (*)(void*, int32_t ): clobber_mask |= 1 << mpos++,\
+ void (*)(void*, uint32_t): clobber_mask |= 1 << mpos++,\
+ void (*)(void*, float ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
+ void (*)(void*, double ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\
+ default: mpos++)
+#define init_clobber_mask(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, ...)\
+ unsigned clobber_mask = 0;\
+ {\
+ int mpos = 0, fp_args = 0;\
+ clobber_type(a); clobber_type(b); clobber_type(c); clobber_type(d);\
+ clobber_type(e); clobber_type(f); clobber_type(g); clobber_type(h);\
+ clobber_type(i); clobber_type(j); clobber_type(k); clobber_type(l);\
+ clobber_type(m); clobber_type(n); clobber_type(o); clobber_type(p);\
+ }
+#else
+/* Skip parameter clobbering on compilers without support for _Generic() */
+#define init_clobber_mask(...) unsigned clobber_mask = 0
+#endif
+#define declare_new(ret, ...)\
+ ret (*checked_call)(__VA_ARGS__, int, int, int, int, int, int, int,\
+ int, int, int, int, int, int, int, int, int,\
+ void*, unsigned) =\
+ (void*)checkasm_checked_call;\
+ init_clobber_mask(__VA_ARGS__, void*, void*, void*, void*,\
+ void*, void*, void*, void*, void*, void*,\
+ void*, void*, void*, void*, void*);
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ checkasm_simd_warmup(),\
+ checked_call(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8,\
+ 7, 6, 5, 4, 3, 2, 1, func_new, clobber_mask));\
+ checkasm_set_signal_handler_state(0)
+#elif ARCH_X86_32
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, __VA_ARGS__, int, int, int, int, int, int,\
+ int, int, int, int, int, int, int, int, int) =\
+ (void *)checkasm_checked_call;
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\
+ 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\
+ checkasm_set_signal_handler_state(0)
+#elif ARCH_ARM
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept
+ * the same even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, int dummy, __VA_ARGS__,\
+ int, int, int, int, int, int, int, int,\
+ int, int, int, int, int, int, int) =\
+ (void *)checkasm_checked_call_vfp;
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\
+ checkasm_set_signal_handler_state(0)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, int, int, int, int, int, int, int,\
+ __VA_ARGS__, int, int, int, int, int, int, int, int,\
+ int, int, int, int, int, int, int) =\
+ (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB),\
+ checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\
+ 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\
+ checkasm_set_signal_handler_state(0)
+#else
+#define declare_new(ret, ...)
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ ((func_type *)func_new)(__VA_ARGS__));\
+ checkasm_set_signal_handler_state(0)
+#endif
+#else /* HAVE_ASM */
+#define declare_new(ret, ...)
+/* Call the function */
+#define call_new(...)\
+ (checkasm_set_signal_handler_state(1),\
+ ((func_type *)func_new)(__VA_ARGS__));\
+ checkasm_set_signal_handler_state(0)
+#endif /* HAVE_ASM */
+
+/* Benchmark the function */
+#ifdef readtime
+#define bench_new(...)\
+ do {\
+ if (checkasm_bench_func()) {\
+ func_type *const tfunc = func_new;\
+ checkasm_set_signal_handler_state(1);\
+ uint64_t tsum = 0;\
+ int tcount = 0;\
+ for (int ti = 0; ti < BENCH_RUNS; ti++) {\
+ uint64_t t = readtime();\
+ int talt = 0; (void)talt;\
+ tfunc(__VA_ARGS__);\
+ talt = 1;\
+ tfunc(__VA_ARGS__);\
+ talt = 0;\
+ tfunc(__VA_ARGS__);\
+ talt = 1;\
+ tfunc(__VA_ARGS__);\
+ t = readtime() - t;\
+ if (t*tcount <= tsum*4 && ti > 0) {\
+ tsum += t;\
+ tcount++;\
+ }\
+ }\
+ checkasm_set_signal_handler_state(0);\
+ checkasm_update_bench(tcount, tsum);\
+ } else {\
+ const int talt = 0; (void)talt;\
+ call_new(__VA_ARGS__);\
+ }\
+ } while (0)
+#else
+#define bench_new(...) do {} while (0)
+#endif
+
+/* Alternates between two pointers. Intended to be used within bench_new()
+ * calls for functions which modifies their input buffer(s) to ensure that
+ * throughput, and not latency, is measured. */
+#define alternate(a, b) (talt ? (b) : (a))
+
+#define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1))
+#define PIXEL_RECT(name, w, h) \
+ ALIGN_STK_64(pixel, name##_buf, ((h)+32)*(ROUND_UP(w,64)+64) + 64,); \
+ ptrdiff_t name##_stride = sizeof(pixel)*(ROUND_UP(w,64)+64); \
+ (void)name##_stride; \
+ pixel *name = name##_buf + (ROUND_UP(w,64)+64)*16 + 64
+
+#define CLEAR_PIXEL_RECT(name) \
+ memset(name##_buf, 0x99, sizeof(name##_buf)) \
+
+#define DECL_CHECKASM_CHECK_FUNC(type) \
+int checkasm_check_##type(const char *const file, const int line, \
+ const type *const buf1, const ptrdiff_t stride1, \
+ const type *const buf2, const ptrdiff_t stride2, \
+ const int w, const int h, const char *const name, \
+ const int align_w, const int align_h, \
+ const int padding)
+
+DECL_CHECKASM_CHECK_FUNC(int8_t);
+DECL_CHECKASM_CHECK_FUNC(int16_t);
+DECL_CHECKASM_CHECK_FUNC(int32_t);
+DECL_CHECKASM_CHECK_FUNC(uint8_t);
+DECL_CHECKASM_CHECK_FUNC(uint16_t);
+DECL_CHECKASM_CHECK_FUNC(uint32_t);
+
+#define CONCAT(a,b) a ## b
+
+#define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__)
+#define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0)
+
+#ifdef BITDEPTH
+#define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__)
+#define checkasm_check_pixel_padded(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 1, 1, 8)
+#define checkasm_check_pixel_padded_align(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 8)
+#define checkasm_check_coef(...) checkasm_check(COEF_TYPE, __VA_ARGS__)
+#endif
+
+#endif /* DAV1D_TESTS_CHECKASM_CHECKASM_H */
--- /dev/null
+/*
+ * Autogenerated by the Meson build system.
+ * Do not edit, your changes will be lost.
+ */
+
+#pragma once
+
+#define ARCH_AARCH64 0
+
+#define ARCH_ARM 0
+
+#define ARCH_PPC64LE 1
+
+#define ARCH_X86 0
+
+#define ARCH_X86_32 0
+
+#define ARCH_X86_64 0
+
+#define CONFIG_16BPC 1
+
+#define CONFIG_8BPC 1
+
+#define CONFIG_LOG 1
+
+#define ENDIANNESS_BIG 0
+
+#define HAVE_ASM 1
+
+#define HAVE_C11_GENERIC 1
+
+#define HAVE_CLOCK_GETTIME 1
+
+#define HAVE_DLSYM 1
+
+#define HAVE_GETAUXVAL 1
+
+#define HAVE_POSIX_MEMALIGN 1
+
+#define HAVE_PTHREAD_GETAFFINITY_NP 1
+
+#define HAVE_UNISTD_H 1
+
+#define TRIM_DSP_FUNCTIONS 0
+
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_ATTRIBUTES_H
+#define DAV1D_COMMON_ATTRIBUTES_H
+
+#include "config.h"
+
+#include <stddef.h>
+#include <assert.h>
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#ifdef __GNUC__
+#define ATTR_ALIAS __attribute__((may_alias))
+#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr)))
+#define COLD __attribute__((cold))
+#else
+#define ATTR_ALIAS
+#define ATTR_FORMAT_PRINTF(fmt, attr)
+#define COLD
+#endif
+
+#if ARCH_X86_64
+/* x86-64 needs 32- and 64-byte alignment for AVX2 and AVX-512. */
+#define ALIGN_64_VAL 64
+#define ALIGN_32_VAL 32
+#define ALIGN_16_VAL 16
+#elif ARCH_X86_32 || ARCH_ARM || ARCH_AARCH64 || ARCH_PPC64LE
+/* ARM doesn't benefit from anything more than 16-byte alignment. */
+#define ALIGN_64_VAL 16
+#define ALIGN_32_VAL 16
+#define ALIGN_16_VAL 16
+#else
+/* No need for extra alignment on platforms without assembly. */
+#define ALIGN_64_VAL 8
+#define ALIGN_32_VAL 8
+#define ALIGN_16_VAL 8
+#endif
+
+/*
+ * API for variables, struct members (ALIGN()) like:
+ * uint8_t var[1][2][3][4]
+ * becomes:
+ * ALIGN(uint8_t var[1][2][3][4], alignment).
+ */
+#ifdef _MSC_VER
+#define ALIGN(ll, a) \
+ __declspec(align(a)) ll
+#else
+#define ALIGN(line, align) \
+ line __attribute__((aligned(align)))
+#endif
+
+/*
+ * API for stack alignment (ALIGN_STK_$align()) of variables like:
+ * uint8_t var[1][2][3][4]
+ * becomes:
+ * ALIGN_STK_$align(uint8_t, var, 1, [2][3][4])
+ */
+#define ALIGN_STK_64(type, var, sz1d, sznd) \
+ ALIGN(type var[sz1d]sznd, ALIGN_64_VAL)
+#define ALIGN_STK_32(type, var, sz1d, sznd) \
+ ALIGN(type var[sz1d]sznd, ALIGN_32_VAL)
+#define ALIGN_STK_16(type, var, sz1d, sznd) \
+ ALIGN(type var[sz1d]sznd, ALIGN_16_VAL)
+
+/*
+ * Forbid inlining of a function:
+ * static NOINLINE void func() {}
+ */
+#ifdef _MSC_VER
+#define NOINLINE __declspec(noinline)
+#elif __has_attribute(noclone)
+#define NOINLINE __attribute__((noinline, noclone))
+#else
+#define NOINLINE __attribute__((noinline))
+#endif
+
+#ifdef _MSC_VER
+#define ALWAYS_INLINE __forceinline
+#else
+#define ALWAYS_INLINE __attribute__((always_inline)) inline
+#endif
+/*
+#if (defined(__ELF__) || defined(__MACH__) || (defined(_WIN32) && defined(__clang__))) && __has_attribute(visibility)
+#define EXTERN extern __attribute__((visibility("hidden")))
+#else */
+#define EXTERN extern
+//#endif
+
+#ifdef __clang__
+#define NO_SANITIZE(x) __attribute__((no_sanitize(x)))
+#else
+#define NO_SANITIZE(x)
+#endif
+
+#if defined(NDEBUG) && (defined(__GNUC__) || defined(__clang__))
+#undef assert
+#define assert(x) do { if (!(x)) __builtin_unreachable(); } while (0)
+#elif defined(NDEBUG) && defined(_MSC_VER)
+#undef assert
+#define assert __assume
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
+# define dav1d_uninit(x) x=x
+#else
+# define dav1d_uninit(x) x
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+static inline int ctz(const unsigned int mask) {
+ unsigned long idx;
+ _BitScanForward(&idx, mask);
+ return idx;
+}
+
+static inline int clz(const unsigned int mask) {
+ unsigned long leading_zero = 0;
+ _BitScanReverse(&leading_zero, mask);
+ return (31 - leading_zero);
+}
+
+#ifdef _WIN64
+static inline int clzll(const unsigned long long mask) {
+ unsigned long leading_zero = 0;
+ _BitScanReverse64(&leading_zero, mask);
+ return (63 - leading_zero);
+}
+#else /* _WIN64 */
+static inline int clzll(const unsigned long long mask) {
+ if (mask >> 32)
+ return clz((unsigned)(mask >> 32));
+ else
+ return clz((unsigned)mask) + 32;
+}
+#endif /* _WIN64 */
+#else /* !_MSC_VER */
+static inline int ctz(const unsigned int mask) {
+ return __builtin_ctz(mask);
+}
+
+static inline int clz(const unsigned int mask) {
+ return __builtin_clz(mask);
+}
+
+static inline int clzll(const unsigned long long mask) {
+ return __builtin_clzll(mask);
+}
+#endif /* !_MSC_VER */
+
+#ifndef static_assert
+#define CHECK_OFFSET(type, field, name) \
+ struct check_##type##_##field { int x[(name == offsetof(type, field)) ? 1 : -1]; }
+#else
+#define CHECK_OFFSET(type, field, name) \
+ static_assert(name == offsetof(type, field), #field)
+#endif
+
+#ifdef _MSC_VER
+#define PACKED(...) __pragma(pack(push, 1)) __VA_ARGS__ __pragma(pack(pop))
+#else
+#define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
+#endif
+
+#endif /* DAV1D_COMMON_ATTRIBUTES_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_BITDEPTH_H
+#define DAV1D_COMMON_BITDEPTH_H
+
+#include <stdint.h>
+#include <string.h>
+
+#include "common/attributes.h"
+
+#if !defined(BITDEPTH)
+typedef void pixel;
+typedef void coef;
+#define HIGHBD_DECL_SUFFIX /* nothing */
+#define HIGHBD_CALL_SUFFIX /* nothing */
+#define HIGHBD_TAIL_SUFFIX /* nothing */
+#elif BITDEPTH == 8
+typedef uint8_t pixel;
+typedef int16_t coef;
+#define PIXEL_TYPE uint8_t
+#define COEF_TYPE int16_t
+#define pixel_copy memcpy
+#define pixel_set memset
+#define iclip_pixel iclip_u8
+#define PIX_HEX_FMT "%02x"
+#define bitfn(x) x##_8bpc
+#define BF(x, suffix) x##_8bpc_##suffix
+#define PXSTRIDE(x) (x)
+#define highbd_only(x)
+#define HIGHBD_DECL_SUFFIX /* nothing */
+#define HIGHBD_CALL_SUFFIX /* nothing */
+#define HIGHBD_TAIL_SUFFIX /* nothing */
+#define bitdepth_from_max(x) 8
+#define BITDEPTH_MAX 0xff
+#elif BITDEPTH == 16
+typedef uint16_t pixel;
+typedef int32_t coef;
+#define PIXEL_TYPE uint16_t
+#define COEF_TYPE int32_t
+#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
+static inline void pixel_set(pixel *const dst, const int val, const int num) {
+ for (int n = 0; n < num; n++)
+ dst[n] = val;
+}
+#define PIX_HEX_FMT "%03x"
+#define iclip_pixel(x) iclip(x, 0, bitdepth_max)
+#define HIGHBD_DECL_SUFFIX , const int bitdepth_max
+#define HIGHBD_CALL_SUFFIX , f->bitdepth_max
+#define HIGHBD_TAIL_SUFFIX , bitdepth_max
+#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max))
+#define BITDEPTH_MAX bitdepth_max
+#define bitfn(x) x##_16bpc
+#define BF(x, suffix) x##_16bpc_##suffix
+static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) {
+ assert(!(x & 1));
+ return x >> 1;
+}
+#define highbd_only(x) x
+#else
+#error invalid value for bitdepth
+#endif
+#define bytefn(x) bitfn(x)
+
+#define bitfn_decls(name, ...) \
+name##_8bpc(__VA_ARGS__); \
+name##_16bpc(__VA_ARGS__)
+
+#endif /* DAV1D_COMMON_BITDEPTH_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_DUMP_H
+#define DAV1D_COMMON_DUMP_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "common/bitdepth.h"
+
+static inline void append_plane_to_file(const pixel *buf, ptrdiff_t stride,
+ int w, int h, const char *const file)
+{
+ FILE *const f = fopen(file, "ab");
+ while (h--) {
+ fwrite(buf, w * sizeof(pixel), 1, f);
+ buf += PXSTRIDE(stride);
+ }
+ fclose(f);
+}
+
+static inline void hex_fdump(FILE *out, const pixel *buf, ptrdiff_t stride,
+ int w, int h, const char *what)
+{
+ fprintf(out, "%s\n", what);
+ while (h--) {
+ int x;
+ for (x = 0; x < w; x++)
+ fprintf(out, " " PIX_HEX_FMT, buf[x]);
+ buf += PXSTRIDE(stride);
+ fprintf(out, "\n");
+ }
+}
+
+static inline void hex_dump(const pixel *buf, ptrdiff_t stride,
+ int w, int h, const char *what)
+{
+ hex_fdump(stdout, buf, stride, w, h, what);
+}
+
+static inline void coef_dump(const coef *buf, const int w, const int h,
+ const int len, const char *what)
+{
+ int y;
+ printf("%s\n", what);
+ for (y = 0; y < h; y++) {
+ int x;
+ for (x = 0; x < w; x++)
+ printf(" %*d", len, buf[x]);
+ buf += w;
+ printf("\n");
+ }
+}
+
+static inline void ac_dump(const int16_t *buf, int w, int h, const char *what)
+{
+ printf("%s\n", what);
+ while (h--) {
+ for (int x = 0; x < w; x++)
+ printf(" %03d", buf[x]);
+ buf += w;
+ printf("\n");
+ }
+}
+
+#endif /* DAV1D_COMMON_DUMP_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_INTOPS_H
+#define DAV1D_COMMON_INTOPS_H
+
+#include <stdint.h>
+
+#include "common/attributes.h"
+
+static inline int imax(const int a, const int b) {
+ return a > b ? a : b;
+}
+
+static inline int imin(const int a, const int b) {
+ return a < b ? a : b;
+}
+
+static inline unsigned umax(const unsigned a, const unsigned b) {
+ return a > b ? a : b;
+}
+
+static inline unsigned umin(const unsigned a, const unsigned b) {
+ return a < b ? a : b;
+}
+
+static inline int iclip(const int v, const int min, const int max) {
+ return v < min ? min : v > max ? max : v;
+}
+
+static inline int iclip_u8(const int v) {
+ return iclip(v, 0, 255);
+}
+
+static inline int apply_sign(const int v, const int s) {
+ return s < 0 ? -v : v;
+}
+
+static inline int apply_sign64(const int v, const int64_t s) {
+ return s < 0 ? -v : v;
+}
+
+static inline int ulog2(const unsigned v) {
+ return 31 - clz(v);
+}
+
+static inline int u64log2(const uint64_t v) {
+ return 63 - clzll(v);
+}
+
+static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
+ if (v > (r << 1))
+ return v;
+ else if ((v & 1) == 0)
+ return (v >> 1) + r;
+ else
+ return r - ((v + 1) >> 1);
+}
+
+#endif /* DAV1D_COMMON_INTOPS_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_VALIDATE_H
+#define DAV1D_COMMON_VALIDATE_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(NDEBUG)
+#define debug_abort()
+#else
+#define debug_abort abort
+#endif
+
+#define validate_input_or_ret_with_msg(x, r, ...) \
+ if (!(x)) { \
+ fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
+ #x, __func__); \
+ fprintf(stderr, __VA_ARGS__); \
+ debug_abort(); \
+ return r; \
+ }
+
+#define validate_input_or_ret(x, r) \
+ if (!(x)) { \
+ fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
+ #x, __func__); \
+ debug_abort(); \
+ return r; \
+ }
+
+#define validate_input(x) validate_input_or_ret(x, )
+
+#endif /* DAV1D_COMMON_VALIDATE_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_COMMON_H
+#define DAV1D_COMMON_H
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifndef DAV1D_API
+ #if defined _WIN32
+ #if defined DAV1D_BUILDING_DLL
+ #define DAV1D_API __declspec(dllexport)
+ #else
+ #define DAV1D_API
+ #endif
+ #else
+ #if __GNUC__ >= 4
+ #define DAV1D_API __attribute__ ((visibility ("default")))
+ #else
+ #define DAV1D_API
+ #endif
+ #endif
+#endif
+
+#if EPERM > 0
+#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code.
+#else
+#define DAV1D_ERR(e) (e)
+#endif
+
+/**
+ * A reference-counted object wrapper for a user-configurable pointer.
+ */
+typedef struct Dav1dUserData {
+ const uint8_t *data; ///< data pointer
+ struct Dav1dRef *ref; ///< allocation origin
+} Dav1dUserData;
+
+/**
+ * Input packet metadata which are copied from the input data used to
+ * decode each image into the matching structure of the output image
+ * returned back to the user. Since these are metadata fields, they
+ * can be used for other purposes than the documented ones, they will
+ * still be passed from input data to output picture without being
+ * used internally.
+ */
+typedef struct Dav1dDataProps {
+ int64_t timestamp; ///< container timestamp of input data, INT64_MIN if unknown (default)
+ int64_t duration; ///< container duration of input data, 0 if unknown (default)
+ int64_t offset; ///< stream offset of input data, -1 if unknown (default)
+ size_t size; ///< packet size, default Dav1dData.sz
+ struct Dav1dUserData user_data; ///< user-configurable data, default NULL members
+} Dav1dDataProps;
+
+/**
+ * Release reference to a Dav1dDataProps.
+ */
+DAV1D_API void dav1d_data_props_unref(Dav1dDataProps *props);
+
+#endif /* DAV1D_COMMON_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_DATA_H
+#define DAV1D_DATA_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common.h"
+
+typedef struct Dav1dData {
+ const uint8_t *data; ///< data pointer
+ size_t sz; ///< data size
+ struct Dav1dRef *ref; ///< allocation origin
+ Dav1dDataProps m; ///< user provided metadata passed to the output picture
+} Dav1dData;
+
+/**
+ * Allocate data.
+ *
+ * @param data Input context.
+ * @param sz Size of the data that should be allocated.
+ *
+ * @return Pointer to the allocated buffer on success. NULL on error.
+ */
+DAV1D_API uint8_t * dav1d_data_create(Dav1dData *data, size_t sz);
+
+/**
+ * Wrap an existing data array.
+ *
+ * @param data Input context.
+ * @param buf The data to be wrapped.
+ * @param sz Size of the data.
+ * @param free_callback Function to be called when we release our last
+ * reference to this data. In this callback, $buf will be
+ * the $buf argument to this function, and $cookie will
+ * be the $cookie input argument to this function.
+ * @param cookie Opaque parameter passed to free_callback().
+ *
+ * @return 0 on success. A negative DAV1D_ERR value on error.
+ */
+DAV1D_API int dav1d_data_wrap(Dav1dData *data, const uint8_t *buf, size_t sz,
+ void (*free_callback)(const uint8_t *buf, void *cookie),
+ void *cookie);
+
+/**
+ * Wrap a user-provided data pointer into a reference counted object.
+ *
+ * data->m.user_data field will initialized to wrap the provided $user_data
+ * pointer.
+ *
+ * $free_callback will be called on the same thread that released the last
+ * reference. If frame threading is used, make sure $free_callback is
+ * thread-safe.
+ *
+ * @param data Input context.
+ * @param user_data The user data to be wrapped.
+ * @param free_callback Function to be called when we release our last
+ * reference to this data. In this callback, $user_data
+ * will be the $user_data argument to this function, and
+ * $cookie will be the $cookie input argument to this
+ * function.
+ * @param cookie Opaque parameter passed to $free_callback.
+ *
+ * @return 0 on success. A negative DAV1D_ERR value on error.
+ */
+DAV1D_API int dav1d_data_wrap_user_data(Dav1dData *data,
+ const uint8_t *user_data,
+ void (*free_callback)(const uint8_t *user_data,
+ void *cookie),
+ void *cookie);
+
+/**
+ * Free the data reference.
+ *
+ * The reference count for data->m.user_data will be decremented (if it has been
+ * initialized with dav1d_data_wrap_user_data). The $data object will be memset
+ * to 0.
+ *
+ * @param data Input context.
+ */
+DAV1D_API void dav1d_data_unref(Dav1dData *data);
+
+#endif /* DAV1D_DATA_H */
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_H
+#define DAV1D_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <errno.h>
+#include <stdarg.h>
+
+#include "common.h"
+#include "picture.h"
+#include "data.h"
+#include "version.h"
+
+typedef struct Dav1dContext Dav1dContext;
+typedef struct Dav1dRef Dav1dRef;
+
+#define DAV1D_MAX_THREADS 256
+#define DAV1D_MAX_FRAME_DELAY 256
+
+typedef struct Dav1dLogger {
+ void *cookie; ///< Custom data to pass to the callback.
+ /**
+ * Logger callback. May be NULL to disable logging.
+ *
+ * @param cookie Custom pointer passed to all calls.
+ * @param format The vprintf compatible format string.
+ * @param ap List of arguments referenced by the format string.
+ */
+ void (*callback)(void *cookie, const char *format, va_list ap);
+} Dav1dLogger;
+
+enum Dav1dInloopFilterType {
+ DAV1D_INLOOPFILTER_NONE = 0,
+ DAV1D_INLOOPFILTER_DEBLOCK = 1 << 0,
+ DAV1D_INLOOPFILTER_CDEF = 1 << 1,
+ DAV1D_INLOOPFILTER_RESTORATION = 1 << 2,
+ DAV1D_INLOOPFILTER_ALL = DAV1D_INLOOPFILTER_DEBLOCK |
+ DAV1D_INLOOPFILTER_CDEF |
+ DAV1D_INLOOPFILTER_RESTORATION,
+};
+
+typedef struct Dav1dSettings {
+ int n_threads; ///< number of threads (0 = number of logical cores in host system, default 0)
+ int max_frame_delay; ///< Set to 1 for low-latency decoding (0 = ceil(sqrt(n_threads)), default 0)
+ int apply_grain; ///< whether to apply film grain on output frames (default 1)
+ int operating_point; ///< select an operating point for scalable AV1 bitstreams (0 - 31, default 0)
+ int all_layers; ///< output all spatial layers of a scalable AV1 biststream (default 1)
+ unsigned frame_size_limit; ///< maximum frame size, in pixels (0 = unlimited, default 0)
+ Dav1dPicAllocator allocator; ///< Picture allocator callback.
+ Dav1dLogger logger; ///< Logger callback.
+ int strict_std_compliance; ///< whether to abort decoding on standard compliance violations
+ ///< that don't affect actual bitstream decoding (e.g. inconsistent
+ ///< or invalid metadata, default 0)
+ int output_invisible_frames; ///< output invisibly coded frames (in coding order) in addition
+ ///< to all visible frames. Because of show-existing-frame, this
+ ///< means some frames may appear twice (once when coded,
+ ///< once when shown, default 0)
+ enum Dav1dInloopFilterType inloop_filters; ///< postfilters to enable during decoding (default
+ ///< DAV1D_INLOOPFILTER_ALL)
+ uint8_t reserved[20]; ///< reserved for future use
+} Dav1dSettings;
+
+/**
+ * Get library version.
+ */
+DAV1D_API const char *dav1d_version(void);
+
+/**
+ * Initialize settings to default values.
+ *
+ * @param s Input settings context.
+ */
+DAV1D_API void dav1d_default_settings(Dav1dSettings *s);
+
+/**
+ * Allocate and open a decoder instance.
+ *
+ * @param c_out The decoder instance to open. *c_out will be set to the
+ * allocated context.
+ * @param s Input settings context.
+ *
+ * @note The context must be freed using dav1d_close() when decoding is
+ * finished.
+ *
+ * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error.
+ */
+DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s);
+
+/**
+ * Parse a Sequence Header OBU from bitstream data.
+ *
+ * @param out Output Sequence Header.
+ * @param buf The data to be parser.
+ * @param sz Size of the data.
+ *
+ * @return
+ * 0: Success, and out is filled with the parsed Sequence Header
+ * OBU parameters.
+ * DAV1D_ERR(ENOENT): No Sequence Header OBUs were found in the buffer.
+ * other negative DAV1D_ERR codes: Invalid data in the buffer, invalid passed-in
+ * arguments, and other errors during parsing.
+ *
+ * @note It is safe to feed this function data containing other OBUs than a
+ * Sequence Header, as they will simply be ignored. If there is more than
+ * one Sequence Header OBU present, only the last will be returned.
+ */
+DAV1D_API int dav1d_parse_sequence_header(Dav1dSequenceHeader *out,
+ const uint8_t *buf, const size_t sz);
+
+/**
+ * Feed bitstream data to the decoder.
+ *
+ * @param c Input decoder instance.
+ * @param in Input bitstream data. On success, ownership of the reference is
+ * passed to the library.
+ *
+ * @return
+ * 0: Success, and the data was consumed.
+ * DAV1D_ERR(EAGAIN): The data can't be consumed. dav1d_get_picture() should
+ * be called to get one or more frames before the function
+ * can consume new data.
+ * other negative DAV1D_ERR codes: Error during decoding or because of invalid
+ * passed-in arguments.
+ */
+DAV1D_API int dav1d_send_data(Dav1dContext *c, Dav1dData *in);
+
+/**
+ * Return a decoded picture.
+ *
+ * @param c Input decoder instance.
+ * @param out Output frame. The caller assumes ownership of the returned
+ * reference.
+ *
+ * @return
+ * 0: Success, and a frame is returned.
+ * DAV1D_ERR(EAGAIN): Not enough data to output a frame. dav1d_send_data()
+ * should be called with new input.
+ * other negative DAV1D_ERR codes: Error during decoding or because of invalid
+ * passed-in arguments.
+ *
+ * @note To drain buffered frames from the decoder (i.e. on end of stream),
+ * call this function until it returns DAV1D_ERR(EAGAIN).
+ *
+ * @code{.c}
+ * Dav1dData data = { 0 };
+ * Dav1dPicture p = { 0 };
+ * int res;
+ *
+ * read_data(&data);
+ * do {
+ * res = dav1d_send_data(c, &data);
+ * // Keep going even if the function can't consume the current data
+ * packet. It eventually will after one or more frames have been
+ * returned in this loop.
+ * if (res < 0 && res != DAV1D_ERR(EAGAIN))
+ * free_and_abort();
+ * res = dav1d_get_picture(c, &p);
+ * if (res < 0) {
+ * if (res != DAV1D_ERR(EAGAIN))
+ * free_and_abort();
+ * } else
+ * output_and_unref_picture(&p);
+ * // Stay in the loop as long as there's data to consume.
+ * } while (data.sz || read_data(&data) == SUCCESS);
+ *
+ * // Handle EOS by draining all buffered frames.
+ * do {
+ * res = dav1d_get_picture(c, &p);
+ * if (res < 0) {
+ * if (res != DAV1D_ERR(EAGAIN))
+ * free_and_abort();
+ * } else
+ * output_and_unref_picture(&p);
+ * } while (res == 0);
+ * @endcode
+ */
+DAV1D_API int dav1d_get_picture(Dav1dContext *c, Dav1dPicture *out);
+
+/**
+ * Apply film grain to a previously decoded picture. If the picture contains no
+ * film grain metadata, then this function merely returns a new reference.
+ *
+ * @param c Input decoder instance.
+ * @param out Output frame. The caller assumes ownership of the returned
+ * reference.
+ * @param in Input frame. No ownership is transferred.
+ *
+ * @return
+ * 0: Success, and a frame is returned.
+ * other negative DAV1D_ERR codes: Error due to lack of memory or because of
+ * invalid passed-in arguments.
+ *
+ * @note If `Dav1dSettings.apply_grain` is true, film grain was already applied
+ * by `dav1d_get_picture`, and so calling this function leads to double
+ * application of film grain. Users should only call this when needed.
+ */
+DAV1D_API int dav1d_apply_grain(Dav1dContext *c, Dav1dPicture *out,
+ const Dav1dPicture *in);
+
+/**
+ * Close a decoder instance and free all associated memory.
+ *
+ * @param c_out The decoder instance to close. *c_out will be set to NULL.
+ */
+DAV1D_API void dav1d_close(Dav1dContext **c_out);
+
+/**
+ * Flush all delayed frames in decoder and clear internal decoder state,
+ * to be used when seeking.
+ *
+ * @param c Input decoder instance.
+ *
+ * @note Decoding will start only after a valid sequence header OBU is
+ * delivered to dav1d_send_data().
+ *
+ */
+DAV1D_API void dav1d_flush(Dav1dContext *c);
+
+enum Dav1dEventFlags {
+ /**
+ * The last returned picture contains a reference to a new Sequence Header,
+ * either because it's the start of a new coded sequence, or the decoder was
+ * flushed before it was generated.
+ */
+ DAV1D_EVENT_FLAG_NEW_SEQUENCE = 1 << 0,
+ /**
+ * The last returned picture contains a reference to a Sequence Header with
+ * new operating parameters information for the current coded sequence.
+ */
+ DAV1D_EVENT_FLAG_NEW_OP_PARAMS_INFO = 1 << 1,
+};
+
+/**
+ * Fetch a combination of DAV1D_EVENT_FLAG_* event flags generated by the decoding
+ * process.
+ *
+ * @param c Input decoder instance.
+ * @param flags Where to write the flags.
+ *
+ * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error.
+ *
+ * @note Calling this function will clear all the event flags currently stored in
+ * the decoder.
+ *
+ */
+DAV1D_API int dav1d_get_event_flags(Dav1dContext *c, enum Dav1dEventFlags *flags);
+
+/**
+ * Retrieve the user-provided metadata associated with the input data packet
+ * for the last decoding error reported to the user, i.e. a negative return
+ * value (not EAGAIN) from dav1d_send_data() or dav1d_get_picture().
+ *
+ * @param c Input decoder instance.
+ * @param out Output Dav1dDataProps. On success, the caller assumes ownership of
+ * the returned reference.
+ *
+ * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error.
+ */
+DAV1D_API int dav1d_get_decode_error_data_props(Dav1dContext *c, Dav1dDataProps *out);
+
+/**
+ * Get the decoder delay, which is the number of internally buffered frames, not
+ * including reference frames.
+ * This value is guaranteed to be >= 1 and <= max_frame_delay.
+ *
+ * @param s Input settings context.
+ *
+ * @return Decoder frame delay on success, or < 0 (a negative DAV1D_ERR code) on
+ * error.
+ *
+ * @note The returned delay is valid only for a Dav1dContext initialized with the
+ * provided Dav1dSettings.
+ */
+DAV1D_API int dav1d_get_frame_delay(const Dav1dSettings *s);
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif /* DAV1D_H */
--- /dev/null
+/*
+ * Copyright © 2018-2020, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_HEADERS_H
+#define DAV1D_HEADERS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+// Constants from Section 3. "Symbols and abbreviated terms"
+#define DAV1D_MAX_CDEF_STRENGTHS 8
+#define DAV1D_MAX_OPERATING_POINTS 32
+#define DAV1D_MAX_TILE_COLS 64
+#define DAV1D_MAX_TILE_ROWS 64
+#define DAV1D_MAX_SEGMENTS 8
+#define DAV1D_NUM_REF_FRAMES 8
+#define DAV1D_PRIMARY_REF_NONE 7
+#define DAV1D_REFS_PER_FRAME 7
+#define DAV1D_TOTAL_REFS_PER_FRAME (DAV1D_REFS_PER_FRAME + 1)
+
+enum Dav1dObuType {
+ DAV1D_OBU_SEQ_HDR = 1,
+ DAV1D_OBU_TD = 2,
+ DAV1D_OBU_FRAME_HDR = 3,
+ DAV1D_OBU_TILE_GRP = 4,
+ DAV1D_OBU_METADATA = 5,
+ DAV1D_OBU_FRAME = 6,
+ DAV1D_OBU_REDUNDANT_FRAME_HDR = 7,
+ DAV1D_OBU_PADDING = 15,
+};
+
+enum Dav1dTxfmMode {
+ DAV1D_TX_4X4_ONLY,
+ DAV1D_TX_LARGEST,
+ DAV1D_TX_SWITCHABLE,
+ DAV1D_N_TX_MODES,
+};
+
+enum Dav1dFilterMode {
+ DAV1D_FILTER_8TAP_REGULAR,
+ DAV1D_FILTER_8TAP_SMOOTH,
+ DAV1D_FILTER_8TAP_SHARP,
+ DAV1D_N_SWITCHABLE_FILTERS,
+ DAV1D_FILTER_BILINEAR = DAV1D_N_SWITCHABLE_FILTERS,
+ DAV1D_N_FILTERS,
+ DAV1D_FILTER_SWITCHABLE = DAV1D_N_FILTERS,
+};
+
+enum Dav1dAdaptiveBoolean {
+ DAV1D_OFF = 0,
+ DAV1D_ON = 1,
+ DAV1D_ADAPTIVE = 2,
+};
+
+enum Dav1dRestorationType {
+ DAV1D_RESTORATION_NONE,
+ DAV1D_RESTORATION_SWITCHABLE,
+ DAV1D_RESTORATION_WIENER,
+ DAV1D_RESTORATION_SGRPROJ,
+};
+
+enum Dav1dWarpedMotionType {
+ DAV1D_WM_TYPE_IDENTITY,
+ DAV1D_WM_TYPE_TRANSLATION,
+ DAV1D_WM_TYPE_ROT_ZOOM,
+ DAV1D_WM_TYPE_AFFINE,
+};
+
+typedef struct Dav1dWarpedMotionParams {
+ enum Dav1dWarpedMotionType type;
+ int32_t matrix[6];
+ union {
+ struct {
+ int16_t alpha, beta, gamma, delta;
+ } p;
+ int16_t abcd[4];
+ } u;
+} Dav1dWarpedMotionParams;
+
+enum Dav1dPixelLayout {
+ DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
+ DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
+ DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
+ DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
+};
+
+enum Dav1dFrameType {
+ DAV1D_FRAME_TYPE_KEY = 0, ///< Key Intra frame
+ DAV1D_FRAME_TYPE_INTER = 1, ///< Inter frame
+ DAV1D_FRAME_TYPE_INTRA = 2, ///< Non key Intra frame
+ DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame
+};
+
+enum Dav1dColorPrimaries {
+ DAV1D_COLOR_PRI_BT709 = 1,
+ DAV1D_COLOR_PRI_UNKNOWN = 2,
+ DAV1D_COLOR_PRI_BT470M = 4,
+ DAV1D_COLOR_PRI_BT470BG = 5,
+ DAV1D_COLOR_PRI_BT601 = 6,
+ DAV1D_COLOR_PRI_SMPTE240 = 7,
+ DAV1D_COLOR_PRI_FILM = 8,
+ DAV1D_COLOR_PRI_BT2020 = 9,
+ DAV1D_COLOR_PRI_XYZ = 10,
+ DAV1D_COLOR_PRI_SMPTE431 = 11,
+ DAV1D_COLOR_PRI_SMPTE432 = 12,
+ DAV1D_COLOR_PRI_EBU3213 = 22,
+ DAV1D_COLOR_PRI_RESERVED = 255,
+};
+
+enum Dav1dTransferCharacteristics {
+ DAV1D_TRC_BT709 = 1,
+ DAV1D_TRC_UNKNOWN = 2,
+ DAV1D_TRC_BT470M = 4,
+ DAV1D_TRC_BT470BG = 5,
+ DAV1D_TRC_BT601 = 6,
+ DAV1D_TRC_SMPTE240 = 7,
+ DAV1D_TRC_LINEAR = 8,
+ DAV1D_TRC_LOG100 = 9, ///< logarithmic (100:1 range)
+ DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
+ DAV1D_TRC_IEC61966 = 11,
+ DAV1D_TRC_BT1361 = 12,
+ DAV1D_TRC_SRGB = 13,
+ DAV1D_TRC_BT2020_10BIT = 14,
+ DAV1D_TRC_BT2020_12BIT = 15,
+ DAV1D_TRC_SMPTE2084 = 16, ///< PQ
+ DAV1D_TRC_SMPTE428 = 17,
+ DAV1D_TRC_HLG = 18, ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
+ DAV1D_TRC_RESERVED = 255,
+};
+
+enum Dav1dMatrixCoefficients {
+ DAV1D_MC_IDENTITY = 0,
+ DAV1D_MC_BT709 = 1,
+ DAV1D_MC_UNKNOWN = 2,
+ DAV1D_MC_FCC = 4,
+ DAV1D_MC_BT470BG = 5,
+ DAV1D_MC_BT601 = 6,
+ DAV1D_MC_SMPTE240 = 7,
+ DAV1D_MC_SMPTE_YCGCO = 8,
+ DAV1D_MC_BT2020_NCL = 9,
+ DAV1D_MC_BT2020_CL = 10,
+ DAV1D_MC_SMPTE2085 = 11,
+ DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
+ DAV1D_MC_CHROMAT_CL = 13,
+ DAV1D_MC_ICTCP = 14,
+ DAV1D_MC_RESERVED = 255,
+};
+
+enum Dav1dChromaSamplePosition {
+ DAV1D_CHR_UNKNOWN = 0,
+ DAV1D_CHR_VERTICAL = 1, ///< Horizontally co-located with luma(0, 0)
+ ///< sample, between two vertical samples
+ DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
+};
+
+typedef struct Dav1dContentLightLevel {
+ int max_content_light_level;
+ int max_frame_average_light_level;
+} Dav1dContentLightLevel;
+
+typedef struct Dav1dMasteringDisplay {
+ ///< 0.16 fixed point
+ uint16_t primaries[3][2];
+ ///< 0.16 fixed point
+ uint16_t white_point[2];
+ ///< 24.8 fixed point
+ uint32_t max_luminance;
+ ///< 18.14 fixed point
+ uint32_t min_luminance;
+} Dav1dMasteringDisplay;
+
+typedef struct Dav1dITUTT35 {
+ uint8_t country_code;
+ uint8_t country_code_extension_byte;
+ size_t payload_size;
+ uint8_t *payload;
+} Dav1dITUTT35;
+
+typedef struct Dav1dSequenceHeader {
+ /**
+ * Stream profile, 0 for 8-10 bits/component 4:2:0 or monochrome;
+ * 1 for 8-10 bits/component 4:4:4; 2 for 4:2:2 at any bits/component,
+ * or 12 bits/component at any chroma subsampling.
+ */
+ int profile;
+ /**
+ * Maximum dimensions for this stream. In non-scalable streams, these
+ * are often the actual dimensions of the stream, although that is not
+ * a normative requirement.
+ */
+ int max_width, max_height;
+ enum Dav1dPixelLayout layout; ///< format of the picture
+ enum Dav1dColorPrimaries pri; ///< color primaries (av1)
+ enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
+ enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
+ enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
+ /**
+ * 0, 1 and 2 mean 8, 10 or 12 bits/component, respectively. This is not
+ * exactly the same as 'hbd' from the spec; the spec's hbd distinguishes
+ * between 8 (0) and 10-12 (1) bits/component, and another element
+ * (twelve_bit) to distinguish between 10 and 12 bits/component. To get
+ * the spec's hbd, use !!our_hbd, and to get twelve_bit, use hbd == 2.
+ */
+ int hbd;
+ /**
+ * Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
+ * MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
+ */
+ int color_range;
+
+ int num_operating_points;
+ struct Dav1dSequenceHeaderOperatingPoint {
+ int major_level, minor_level;
+ int initial_display_delay;
+ int idc;
+ int tier;
+ int decoder_model_param_present;
+ int display_model_param_present;
+ } operating_points[DAV1D_MAX_OPERATING_POINTS];
+
+ int still_picture;
+ int reduced_still_picture_header;
+ int timing_info_present;
+ int num_units_in_tick;
+ int time_scale;
+ int equal_picture_interval;
+ unsigned num_ticks_per_picture;
+ int decoder_model_info_present;
+ int encoder_decoder_buffer_delay_length;
+ int num_units_in_decoding_tick;
+ int buffer_removal_delay_length;
+ int frame_presentation_delay_length;
+ int display_model_info_present;
+ int width_n_bits, height_n_bits;
+ int frame_id_numbers_present;
+ int delta_frame_id_n_bits;
+ int frame_id_n_bits;
+ int sb128;
+ int filter_intra;
+ int intra_edge_filter;
+ int inter_intra;
+ int masked_compound;
+ int warped_motion;
+ int dual_filter;
+ int order_hint;
+ int jnt_comp;
+ int ref_frame_mvs;
+ enum Dav1dAdaptiveBoolean screen_content_tools;
+ enum Dav1dAdaptiveBoolean force_integer_mv;
+ int order_hint_n_bits;
+ int super_res;
+ int cdef;
+ int restoration;
+ int ss_hor, ss_ver, monochrome;
+ int color_description_present;
+ int separate_uv_delta_q;
+ int film_grain_present;
+
+ // Dav1dSequenceHeaders of the same sequence are required to be
+ // bit-identical until this offset. See 7.5 "Ordering of OBUs":
+ // Within a particular coded video sequence, the contents of
+ // sequence_header_obu must be bit-identical each time the
+ // sequence header appears except for the contents of
+ // operating_parameters_info.
+ struct Dav1dSequenceHeaderOperatingParameterInfo {
+ int decoder_buffer_delay;
+ int encoder_buffer_delay;
+ int low_delay_mode;
+ } operating_parameter_info[DAV1D_MAX_OPERATING_POINTS];
+} Dav1dSequenceHeader;
+
+typedef struct Dav1dSegmentationData {
+ int delta_q;
+ int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v;
+ int ref;
+ int skip;
+ int globalmv;
+} Dav1dSegmentationData;
+
+typedef struct Dav1dSegmentationDataSet {
+ Dav1dSegmentationData d[DAV1D_MAX_SEGMENTS];
+ int preskip;
+ int last_active_segid;
+} Dav1dSegmentationDataSet;
+
+typedef struct Dav1dLoopfilterModeRefDeltas {
+ int mode_delta[2 /* is_zeromv */];
+ int ref_delta[DAV1D_TOTAL_REFS_PER_FRAME];
+} Dav1dLoopfilterModeRefDeltas;
+
+typedef struct Dav1dFilmGrainData {
+ unsigned seed;
+ int num_y_points;
+ uint8_t y_points[14][2 /* value, scaling */];
+ int chroma_scaling_from_luma;
+ int num_uv_points[2];
+ uint8_t uv_points[2][10][2 /* value, scaling */];
+ int scaling_shift;
+ int ar_coeff_lag;
+ int8_t ar_coeffs_y[24];
+ int8_t ar_coeffs_uv[2][25 + 3 /* padding for alignment purposes */];
+ uint64_t ar_coeff_shift;
+ int grain_scale_shift;
+ int uv_mult[2];
+ int uv_luma_mult[2];
+ int uv_offset[2];
+ int overlap_flag;
+ int clip_to_restricted_range;
+} Dav1dFilmGrainData;
+
+typedef struct Dav1dFrameHeader {
+ struct {
+ Dav1dFilmGrainData data;
+ int present, update;
+ } film_grain; ///< film grain parameters
+ enum Dav1dFrameType frame_type; ///< type of the picture
+ int width[2 /* { coded_width, superresolution_upscaled_width } */], height;
+ int frame_offset; ///< frame number
+ int temporal_id; ///< temporal id of the frame for SVC
+ int spatial_id; ///< spatial id of the frame for SVC
+
+ int show_existing_frame;
+ int existing_frame_idx;
+ int frame_id;
+ int frame_presentation_delay;
+ int show_frame;
+ int showable_frame;
+ int error_resilient_mode;
+ int disable_cdf_update;
+ int allow_screen_content_tools;
+ int force_integer_mv;
+ int frame_size_override;
+ int primary_ref_frame;
+ int buffer_removal_time_present;
+ struct Dav1dFrameHeaderOperatingPoint {
+ int buffer_removal_time;
+ } operating_points[DAV1D_MAX_OPERATING_POINTS];
+ int refresh_frame_flags;
+ int render_width, render_height;
+ struct {
+ int width_scale_denominator;
+ int enabled;
+ } super_res;
+ int have_render_size;
+ int allow_intrabc;
+ int frame_ref_short_signaling;
+ int refidx[DAV1D_REFS_PER_FRAME];
+ int hp;
+ enum Dav1dFilterMode subpel_filter_mode;
+ int switchable_motion_mode;
+ int use_ref_frame_mvs;
+ int refresh_context;
+ struct {
+ int uniform;
+ unsigned n_bytes;
+ int min_log2_cols, max_log2_cols, log2_cols, cols;
+ int min_log2_rows, max_log2_rows, log2_rows, rows;
+ uint16_t col_start_sb[DAV1D_MAX_TILE_COLS + 1];
+ uint16_t row_start_sb[DAV1D_MAX_TILE_ROWS + 1];
+ int update;
+ } tiling;
+ struct {
+ int yac;
+ int ydc_delta;
+ int udc_delta, uac_delta, vdc_delta, vac_delta;
+ int qm, qm_y, qm_u, qm_v;
+ } quant;
+ struct {
+ int enabled, update_map, temporal, update_data;
+ Dav1dSegmentationDataSet seg_data;
+ int lossless[DAV1D_MAX_SEGMENTS], qidx[DAV1D_MAX_SEGMENTS];
+ } segmentation;
+ struct {
+ struct {
+ int present;
+ int res_log2;
+ } q;
+ struct {
+ int present;
+ int res_log2;
+ int multi;
+ } lf;
+ } delta;
+ int all_lossless;
+ struct {
+ int level_y[2 /* dir */];
+ int level_u, level_v;
+ int mode_ref_delta_enabled;
+ int mode_ref_delta_update;
+ Dav1dLoopfilterModeRefDeltas mode_ref_deltas;
+ int sharpness;
+ } loopfilter;
+ struct {
+ int damping;
+ int n_bits;
+ int y_strength[DAV1D_MAX_CDEF_STRENGTHS];
+ int uv_strength[DAV1D_MAX_CDEF_STRENGTHS];
+ } cdef;
+ struct {
+ enum Dav1dRestorationType type[3 /* plane */];
+ int unit_size[2 /* y, uv */];
+ } restoration;
+ enum Dav1dTxfmMode txfm_mode;
+ int switchable_comp_refs;
+ int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2];
+ int warp_motion;
+ int reduced_txtp_set;
+ Dav1dWarpedMotionParams gmv[DAV1D_REFS_PER_FRAME];
+} Dav1dFrameHeader;
+
+#endif /* DAV1D_HEADERS_H */
--- /dev/null
+/*
+ * Copyright © 2018-2020, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_PICTURE_H
+#define DAV1D_PICTURE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common.h"
+#include "headers.h"
+
+/* Number of bytes to align AND pad picture memory buffers by, so that SIMD
+ * implementations can over-read by a few bytes, and use aligned read/write
+ * instructions. */
+#define DAV1D_PICTURE_ALIGNMENT 64
+
+typedef struct Dav1dPictureParameters {
+ int w; ///< width (in pixels)
+ int h; ///< height (in pixels)
+ enum Dav1dPixelLayout layout; ///< format of the picture
+ int bpc; ///< bits per pixel component (8 or 10)
+} Dav1dPictureParameters;
+
+typedef struct Dav1dPicture {
+ Dav1dSequenceHeader *seq_hdr;
+ Dav1dFrameHeader *frame_hdr;
+
+ /**
+ * Pointers to planar image data (Y is [0], U is [1], V is [2]). The data
+ * should be bytes (for 8 bpc) or words (for 10 bpc). In case of words
+ * containing 10 bpc image data, the pixels should be located in the LSB
+ * bits, so that values range between [0, 1023]; the upper bits should be
+ * zero'ed out.
+ */
+ void *data[3];
+
+ /**
+ * Number of bytes between 2 lines in data[] for luma [0] or chroma [1].
+ */
+ ptrdiff_t stride[2];
+
+ Dav1dPictureParameters p;
+ Dav1dDataProps m;
+
+ /**
+ * High Dynamic Range Content Light Level metadata applying to this picture,
+ * as defined in section 5.8.3 and 6.7.3
+ */
+ Dav1dContentLightLevel *content_light;
+ /**
+ * High Dynamic Range Mastering Display Color Volume metadata applying to
+ * this picture, as defined in section 5.8.4 and 6.7.4
+ */
+ Dav1dMasteringDisplay *mastering_display;
+ /**
+ * ITU-T T.35 metadata as defined in section 5.8.2 and 6.7.2
+ */
+ Dav1dITUTT35 *itut_t35;
+
+ uintptr_t reserved[4]; ///< reserved for future use
+
+ struct Dav1dRef *frame_hdr_ref; ///< Dav1dFrameHeader allocation origin
+ struct Dav1dRef *seq_hdr_ref; ///< Dav1dSequenceHeader allocation origin
+ struct Dav1dRef *content_light_ref; ///< Dav1dContentLightLevel allocation origin
+ struct Dav1dRef *mastering_display_ref; ///< Dav1dMasteringDisplay allocation origin
+ struct Dav1dRef *itut_t35_ref; ///< Dav1dITUTT35 allocation origin
+ uintptr_t reserved_ref[4]; ///< reserved for future use
+ struct Dav1dRef *ref; ///< Frame data allocation origin
+
+ void *allocator_data; ///< pointer managed by the allocator
+} Dav1dPicture;
+
+typedef struct Dav1dPicAllocator {
+ void *cookie; ///< custom data to pass to the allocator callbacks.
+ /**
+ * Allocate the picture buffer based on the Dav1dPictureParameters.
+ *
+ * The data[0], data[1] and data[2] must be DAV1D_PICTURE_ALIGNMENT byte
+ * aligned and with a pixel width/height multiple of 128 pixels. Any
+ * allocated memory area should also be padded by DAV1D_PICTURE_ALIGNMENT
+ * bytes.
+ * data[1] and data[2] must share the same stride[1].
+ *
+ * This function will be called on the main thread (the thread which calls
+ * dav1d_get_picture()).
+ *
+ * @param pic The picture to allocate the buffer for. The callback needs to
+ * fill the picture data[0], data[1], data[2], stride[0] and
+ * stride[1].
+ * The allocator can fill the pic allocator_data pointer with
+ * a custom pointer that will be passed to
+ * release_picture_callback().
+ * @param cookie Custom pointer passed to all calls.
+ *
+ * @note No fields other than data, stride and allocator_data must be filled
+ * by this callback.
+ * @return 0 on success. A negative DAV1D_ERR value on error.
+ */
+ int (*alloc_picture_callback)(Dav1dPicture *pic, void *cookie);
+ /**
+ * Release the picture buffer.
+ *
+ * If frame threading is used, this function may be called by the main
+ * thread (the thread which calls dav1d_get_picture()) or any of the frame
+ * threads and thus must be thread-safe. If frame threading is not used,
+ * this function will only be called on the main thread.
+ *
+ * @param pic The picture that was filled by alloc_picture_callback().
+ * @param cookie Custom pointer passed to all calls.
+ */
+ void (*release_picture_callback)(Dav1dPicture *pic, void *cookie);
+} Dav1dPicAllocator;
+
+/**
+ * Release reference to a picture.
+ */
+DAV1D_API void dav1d_picture_unref(Dav1dPicture *p);
+
+#endif /* DAV1D_PICTURE_H */
--- /dev/null
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_VERSION_H
+#define DAV1D_VERSION_H
+
+#define DAV1D_API_VERSION_MAJOR 6
+#define DAV1D_API_VERSION_MINOR 7
+#define DAV1D_API_VERSION_PATCH 0
+
+#endif /* DAV1D_VERSION_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_CDEF_H
+#define DAV1D_SRC_CDEF_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common/bitdepth.h"
+
+enum CdefEdgeFlags {
+ CDEF_HAVE_LEFT = 1 << 0,
+ CDEF_HAVE_RIGHT = 1 << 1,
+ CDEF_HAVE_TOP = 1 << 2,
+ CDEF_HAVE_BOTTOM = 1 << 3,
+};
+
+#ifdef BITDEPTH
+typedef const pixel (*const_left_pixel_row_2px)[2];
+#else
+typedef const void *const_left_pixel_row_2px;
+#endif
+
+// CDEF operates entirely on pre-filter data; if bottom/right edges are
+// present (according to $edges), then the pre-filter data is located in
+// $dst. However, the edge pixels above $dst may be post-filter, so in
+// order to get access to pre-filter top pixels, use $top.
+#define decl_cdef_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \
+ const pixel *top, const pixel *bottom, \
+ int pri_strength, int sec_strength, \
+ int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX)
+typedef decl_cdef_fn(*cdef_fn);
+
+#define decl_cdef_dir_fn(name) \
+int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX)
+typedef decl_cdef_dir_fn(*cdef_dir_fn);
+
+typedef struct Dav1dCdefDSPContext {
+ cdef_dir_fn dir;
+ cdef_fn fb[3 /* 444/luma, 422, 420 */];
+} Dav1dCdefDSPContext;
+
+bitfn_decls(void dav1d_cdef_dsp_init, Dav1dCdefDSPContext *c);
+
+#endif /* DAV1D_SRC_CDEF_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "common/intops.h"
+
+#include "src/cdef.h"
+#include "src/tables.h"
+
+static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX)
+{
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ printf("bitdepth_max = %d, bitdepth_min_8 = %d\n", bitdepth_max, bitdepth_min_8);
+ int partial_sum_hv[2][8] = { { 0 } };
+ int partial_sum_diag[2][15] = { { 0 } };
+ int partial_sum_alt[4][11] = { { 0 } };
+
+ for (int y = 0; y < 8; y++) {
+ printf("img: y: %d : %p -> \t", y, img);
+ for (int x = 0; x < 8; x++) {
+ const int px = (img[x] >> bitdepth_min_8) - 128;
+ printf("%04x/%04x ", img[x], px);
+
+ partial_sum_diag[0][ y + x ] += px;
+ partial_sum_alt [0][ y + (x >> 1)] += px;
+ partial_sum_hv [0][ y ] += px;
+ partial_sum_alt [1][3 + y - (x >> 1)] += px;
+ partial_sum_diag[1][7 + y - x ] += px;
+ partial_sum_alt [2][3 - (y >> 1) + x ] += px;
+ partial_sum_hv [1][ x ] += px;
+ partial_sum_alt [3][ (y >> 1) + x ] += px;
+ }
+ printf("\n");
+ img += PXSTRIDE(stride);
+ }
+
+ printf("partial_sum_alt : \n");
+ for (int y = 0; y < 4; y++) {
+ for (int x = 0; x < 11; x++) {
+ printf("%08x ", partial_sum_alt[y][x]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+
+ unsigned cost[8] = { 0 };
+ for (int n = 0; n < 8; n++) {
+ cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n];
+ cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n];
+ }
+ cost[2] *= 105;
+ cost[6] *= 105;
+
+ static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 };
+ for (int n = 0; n < 7; n++) {
+ const int d = div_table[n];
+ printf("n: %d\n", n);
+ int t = partial_sum_diag[0][14 - n];
+ printf("partial_sum_diag[0][14 - %d] = %d/%08x, partial_sum_diag[0][14 - %d]^2 = %d/%08x\n", n, t, t , n, t * t, t * t);
+ t = (partial_sum_diag[0][n] * partial_sum_diag[0][n] +
+ partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d;
+ printf("t = %d/%08x, d = %d/%08x, t * d = %d/%08x\n", t, t, d, d, t * d, t * d);
+ cost[0] += t * d;
+ cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] +
+ partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d;
+ }
+ cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105;
+ cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105;
+
+ printf("cost: \n");
+ for (int y = 0; y < 8; y++) {
+ printf("%08x ", cost[y]);
+ }
+ printf("\n");
+
+ for (int n = 0; n < 4; n++) {
+ unsigned *const cost_ptr = &cost[n * 2 + 1];
+ for (int m = 0; m < 5; m++)
+ *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m];
+ *cost_ptr *= 105;
+ for (int m = 0; m < 3; m++) {
+ const int d = div_table[2 * m + 1];
+ *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] +
+ partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d;
+ }
+ }
+ printf("cost: \n");
+ for (int y = 0; y < 8; y++) {
+ printf("%08x ", cost[y]);
+ }
+ printf("\n");
+
+ int best_dir = 0;
+ unsigned best_cost = cost[0];
+ for (int n = 1; n < 8; n++) {
+ if (cost[n] > best_cost) {
+ best_cost = cost[n];
+ best_dir = n;
+ }
+ }
+
+ *var = (best_cost - (cost[best_dir ^ 4])) >> 10;
+ return best_dir;
+}
+
+#if HAVE_ASM
+#if ARCH_AARCH64 || ARCH_ARM
+#include "src/arm/cdef.h"
+#elif ARCH_PPC64LE
+#include "src/ppc/cdef.h"
+#elif ARCH_X86
+#include "src/x86/cdef.h"
+#endif
+#endif
+
+COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
+ c->dir = cdef_find_dir_c;
+
+#if HAVE_ASM
+#if ARCH_AARCH64 || ARCH_ARM
+ cdef_dsp_init_arm(c);
+#elif ARCH_PPC64LE
+ cdef_dsp_init_ppc(c);
+#elif ARCH_X86
+ cdef_dsp_init_x86(c);
+#endif
+#endif
+}
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "config.h"
+
+#include <stdint.h>
+
+#include "src/cpu.h"
+#include "src/log.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#elif defined(__APPLE__)
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#else
+#include <pthread.h>
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+#if defined(__FreeBSD__)
+#define cpu_set_t cpuset_t
+#endif
+
+unsigned dav1d_cpu_flags = 0U;
+unsigned dav1d_cpu_flags_mask = ~0U;
+
+COLD void dav1d_init_cpu(void) {
+#if HAVE_ASM && !__has_feature(memory_sanitizer)
+// memory sanitizer is inherently incompatible with asm
+#if ARCH_AARCH64 || ARCH_ARM
+ dav1d_cpu_flags = dav1d_get_cpu_flags_arm();
+#elif ARCH_PPC64LE
+ dav1d_cpu_flags = dav1d_get_cpu_flags_ppc();
+#elif ARCH_X86
+ dav1d_cpu_flags = dav1d_get_cpu_flags_x86();
+#endif
+#endif
+}
+
+COLD void dav1d_set_cpu_flags_mask(const unsigned mask) {
+ dav1d_cpu_flags_mask = mask;
+}
+
+COLD int dav1d_num_logical_processors(Dav1dContext *const c) {
+#ifdef _WIN32
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ GROUP_AFFINITY affinity;
+ if (GetThreadGroupAffinity(GetCurrentThread(), &affinity)) {
+ int num_processors = 1;
+ while (affinity.Mask &= affinity.Mask - 1)
+ num_processors++;
+ return num_processors;
+ }
+#else
+ SYSTEM_INFO system_info;
+ GetNativeSystemInfo(&system_info);
+ return system_info.dwNumberOfProcessors;
+#endif
+#elif defined(HAVE_PTHREAD_GETAFFINITY_NP) && defined(CPU_COUNT)
+ cpu_set_t affinity;
+ if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity))
+ return CPU_COUNT(&affinity);
+#elif defined(__APPLE__)
+ int num_processors;
+ size_t length = sizeof(num_processors);
+ if (!sysctlbyname("hw.logicalcpu", &num_processors, &length, NULL, 0))
+ return num_processors;
+#elif defined(_SC_NPROCESSORS_ONLN)
+ return (int)sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+ if (c)
+ dav1d_log(c, "Unable to detect thread count, defaulting to single-threaded mode\n");
+ return 1;
+}
--- /dev/null
+/*
+ * Copyright © 2018-2022, VideoLAN and dav1d authors
+ * Copyright © 2018-2022, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_CPU_H
+#define DAV1D_SRC_CPU_H
+
+#include "config.h"
+
+#include "common/attributes.h"
+
+#include "dav1d/common.h"
+#include "dav1d/dav1d.h"
+
+#if ARCH_AARCH64 || ARCH_ARM
+#include "src/arm/cpu.h"
+#elif ARCH_PPC64LE
+#include "src/ppc/cpu.h"
+#elif ARCH_X86
+#include "src/x86/cpu.h"
+#endif
+
+EXTERN unsigned dav1d_cpu_flags;
+EXTERN unsigned dav1d_cpu_flags_mask;
+
+void dav1d_init_cpu(void);
+DAV1D_API void dav1d_set_cpu_flags_mask(unsigned mask);
+int dav1d_num_logical_processors(Dav1dContext *c);
+
+static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) {
+ unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask;
+
+#if TRIM_DSP_FUNCTIONS
+/* Since this function is inlined, unconditionally setting a flag here will
+ * enable dead code elimination in the calling function. */
+#if ARCH_AARCH64 || ARCH_ARM
+#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64
+ flags |= DAV1D_ARM_CPU_FLAG_NEON;
+#endif
+#elif ARCH_PPC64LE
+#if defined(__VSX__)
+ flags |= DAV1D_PPC_CPU_FLAG_VSX;
+#endif
+#if defined(HAVE_SVP64)
+ flags |= DAV1D_PPC_CPU_FLAG_SVP64;
+#endif
+#elif ARCH_X86
+#if defined(__AVX512F__) && defined(__AVX512CD__) && \
+ defined(__AVX512BW__) && defined(__AVX512DQ__) && \
+ defined(__AVX512VL__) && defined(__AVX512VNNI__) && \
+ defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \
+ defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
+ defined(__AVX512BITALG__) && defined(__GFNI__) && \
+ defined(__VAES__) && defined(__VPCLMULQDQ__)
+ flags |= DAV1D_X86_CPU_FLAG_AVX512ICL |
+ DAV1D_X86_CPU_FLAG_AVX2 |
+ DAV1D_X86_CPU_FLAG_SSE41 |
+ DAV1D_X86_CPU_FLAG_SSSE3 |
+ DAV1D_X86_CPU_FLAG_SSE2;
+#elif defined(__AVX2__)
+ flags |= DAV1D_X86_CPU_FLAG_AVX2 |
+ DAV1D_X86_CPU_FLAG_SSE41 |
+ DAV1D_X86_CPU_FLAG_SSSE3 |
+ DAV1D_X86_CPU_FLAG_SSE2;
+#elif defined(__SSE4_1__) || defined(__AVX__)
+ flags |= DAV1D_X86_CPU_FLAG_SSE41 |
+ DAV1D_X86_CPU_FLAG_SSSE3 |
+ DAV1D_X86_CPU_FLAG_SSE2;
+#elif defined(__SSSE3__)
+ flags |= DAV1D_X86_CPU_FLAG_SSSE3 |
+ DAV1D_X86_CPU_FLAG_SSE2;
+#elif ARCH_X86_64 || defined(__SSE2__) || \
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+ flags |= DAV1D_X86_CPU_FLAG_SSE2;
+#endif
+#endif
+#endif
+
+ return flags;
+}
+
+#endif /* DAV1D_SRC_CPU_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_ENV_H
+#define DAV1D_SRC_ENV_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "src/levels.h"
+//#include "src/refmvs.h"
+#include "src/tables.h"
+
+typedef struct BlockContext {
+ uint8_t ALIGN(mode[32], 8);
+ uint8_t ALIGN(lcoef[32], 8);
+ uint8_t ALIGN(ccoef[2][32], 8);
+ uint8_t ALIGN(seg_pred[32], 8);
+ uint8_t ALIGN(skip[32], 8);
+ uint8_t ALIGN(skip_mode[32], 8);
+ uint8_t ALIGN(intra[32], 8);
+ uint8_t ALIGN(comp_type[32], 8);
+ int8_t ALIGN(ref[2][32], 8); // -1 means intra
+ uint8_t ALIGN(filter[2][32], 8); // 3 means unset
+ int8_t ALIGN(tx_intra[32], 8);
+ int8_t ALIGN(tx[32], 8);
+ uint8_t ALIGN(tx_lpf_y[32], 8);
+ uint8_t ALIGN(tx_lpf_uv[32], 8);
+ uint8_t ALIGN(partition[16], 8);
+ uint8_t ALIGN(uvmode[32], 8);
+ uint8_t ALIGN(pal_sz[32], 8);
+} BlockContext;
+
+static inline int get_intra_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ if (have_left) {
+ if (have_top) {
+ const int ctx = l->intra[yb4] + a->intra[xb4];
+ return ctx + (ctx == 2);
+ } else
+ return l->intra[yb4] * 2;
+ } else {
+ return have_top ? a->intra[xb4] * 2 : 0;
+ }
+}
+
+static inline int get_tx_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const TxfmInfo *const max_tx,
+ const int yb4, const int xb4)
+{
+ return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw);
+}
+
+static inline int get_partition_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const enum BlockLevel bl,
+ const int yb8, const int xb8)
+{
+ return ((a->partition[xb8] >> (4 - bl)) & 1) +
+ (((l->partition[yb8] >> (4 - bl)) & 1) << 1);
+}
+
+static inline unsigned gather_left_partition_prob(const uint16_t *const in,
+ const enum BlockLevel bl)
+{
+ unsigned out = in[PARTITION_H - 1] - in[PARTITION_H];
+ // Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT,
+ // PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors.
+ out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT];
+ if (bl != BL_128X128)
+ out += in[PARTITION_H4 - 1] - in[PARTITION_H4];
+ return out;
+}
+
+static inline unsigned gather_top_partition_prob(const uint16_t *const in,
+ const enum BlockLevel bl)
+{
+ // Exploit the fact that cdfs for PARTITION_V, PARTITION_SPLIT and
+ // PARTITION_T_TOP_SPLIT are neighbors.
+ unsigned out = in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT];
+ // Exploit the facts that cdfs for PARTITION_T_LEFT_SPLIT and
+ // PARTITION_T_RIGHT_SPLIT are neighbors, the probability for
+ // PARTITION_V4 is always zero, and the probability for
+ // PARTITION_T_RIGHT_SPLIT is zero in 128x128 blocks.
+ out += in[PARTITION_T_LEFT_SPLIT - 1];
+ if (bl != BL_128X128)
+ out += in[PARTITION_V4 - 1] - in[PARTITION_T_RIGHT_SPLIT];
+ return out;
+}
+
+static inline enum TxfmType get_uv_inter_txtp(const TxfmInfo *const uvt_dim,
+ const enum TxfmType ytxtp)
+{
+ if (uvt_dim->max == TX_32X32)
+ return ytxtp == IDTX ? IDTX : DCT_DCT;
+ if (uvt_dim->min == TX_16X16 &&
+ ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) |
+ (1 << H_ADST) | (1 << V_ADST))))
+ {
+ return DCT_DCT;
+ }
+
+ return ytxtp;
+}
+
+static inline int get_filter_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int comp, const int dir, const int ref,
+ const int yb4, const int xb4)
+{
+ const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
+ a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS;
+ const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
+ l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS;
+
+ if (a_filter == l_filter) {
+ return comp * 4 + a_filter;
+ } else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) {
+ return comp * 4 + l_filter;
+ } else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) {
+ return comp * 4 + a_filter;
+ } else {
+ return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS;
+ }
+}
+
+static inline int get_comp_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ if (have_top) {
+ if (have_left) {
+ if (a->comp_type[xb4]) {
+ if (l->comp_type[yb4]) {
+ return 4;
+ } else {
+ // 4U means intra (-1) or bwd (>= 4)
+ return 2 + ((unsigned)l->ref[0][yb4] >= 4U);
+ }
+ } else if (l->comp_type[yb4]) {
+ // 4U means intra (-1) or bwd (>= 4)
+ return 2 + ((unsigned)a->ref[0][xb4] >= 4U);
+ } else {
+ return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4);
+ }
+ } else {
+ return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4;
+ }
+ } else if (have_left) {
+ return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4;
+ } else {
+ return 1;
+ }
+}
+
+static inline int get_comp_dir_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+#define has_uni_comp(edge, off) \
+ ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
+
+ if (have_top && have_left) {
+ const int a_intra = a->intra[xb4], l_intra = l->intra[yb4];
+
+ if (a_intra && l_intra) return 2;
+ if (a_intra || l_intra) {
+ const BlockContext *const edge = a_intra ? l : a;
+ const int off = a_intra ? yb4 : xb4;
+
+ if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
+ return 1 + 2 * has_uni_comp(edge, off);
+ }
+
+ const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE;
+ const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE;
+ const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4];
+
+ if (!a_comp && !l_comp) {
+ return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4));
+ } else if (!a_comp || !l_comp) {
+ const BlockContext *const edge = a_comp ? a : l;
+ const int off = a_comp ? xb4 : yb4;
+
+ if (!has_uni_comp(edge, off)) return 1;
+ return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4));
+ } else {
+ const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
+
+ if (!a_uni && !l_uni) return 0;
+ if (!a_uni || !l_uni) return 2;
+ return 3 + ((a_ref0 == 4) == (l_ref0 == 4));
+ }
+ } else if (have_top || have_left) {
+ const BlockContext *const edge = have_left ? l : a;
+ const int off = have_left ? yb4 : xb4;
+
+ if (edge->intra[off]) return 2;
+ if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
+ return 4 * has_uni_comp(edge, off);
+ } else {
+ return 2;
+ }
+}
+
+static inline int get_poc_diff(const int order_hint_n_bits,
+ const int poc0, const int poc1)
+{
+ if (!order_hint_n_bits) return 0;
+ const int mask = 1 << (order_hint_n_bits - 1);
+ const int diff = poc0 - poc1;
+ return (diff & (mask - 1)) - (diff & mask);
+}
+
+static inline int get_jnt_comp_ctx(const int order_hint_n_bits,
+ const unsigned poc, const unsigned ref0poc,
+ const unsigned ref1poc,
+ const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4)
+{
+ const unsigned d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
+ const unsigned d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc));
+ const int offset = d0 == d1;
+ const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
+ a->ref[0][xb4] == 6;
+ const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG ||
+ l->ref[0][yb4] == 6;
+
+ return 3 * offset + a_ctx + l_ctx;
+}
+
+static inline int get_mask_comp_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4)
+{
+ const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 :
+ a->ref[0][xb4] == 6 ? 3 : 0;
+ const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 :
+ l->ref[0][yb4] == 6 ? 3 : 0;
+
+ return imin(a_ctx + l_ctx, 5);
+}
+
+#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx
+#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx
+#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx
+#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx
+#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx
+#define av1_get_uni_p_ctx av1_get_ref_ctx
+#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx
+
+static inline int av1_get_ref_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ int have_top, int have_left)
+{
+ int cnt[2] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ cnt[a->ref[0][xb4] >= 4]++;
+ if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ cnt[l->ref[0][yb4] >= 4]++;
+ if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++;
+ }
+
+ return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[4] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++;
+ if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++;
+ if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++;
+ }
+
+ cnt[0] += cnt[1];
+ cnt[2] += cnt[3];
+
+ return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_1_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[2] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++;
+ if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++;
+ if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++;
+ }
+
+ return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_2_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[2] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if ((a->ref[0][xb4] ^ 2U) < 2) cnt[a->ref[0][xb4] - 2]++;
+ if (a->comp_type[xb4] && (a->ref[1][xb4] ^ 2U) < 2) cnt[a->ref[1][xb4] - 2]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if ((l->ref[0][yb4] ^ 2U) < 2) cnt[l->ref[0][yb4] - 2]++;
+ if (l->comp_type[yb4] && (l->ref[1][yb4] ^ 2U) < 2) cnt[l->ref[1][yb4] - 2]++;
+ }
+
+ return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_bwd_ref_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[3] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
+ if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
+ if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
+ }
+
+ cnt[1] += cnt[0];
+
+ return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2;
+}
+
+static inline int av1_get_bwd_ref_1_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[3] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
+ if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
+ if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
+ }
+
+ return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_uni_p1_ctx(const BlockContext *const a,
+ const BlockContext *const l,
+ const int yb4, const int xb4,
+ const int have_top, const int have_left)
+{
+ int cnt[3] = { 0 };
+
+ if (have_top && !a->intra[xb4]) {
+ if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++;
+ if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++;
+ }
+
+ if (have_left && !l->intra[yb4]) {
+ if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++;
+ if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++;
+ }
+
+ cnt[1] += cnt[2];
+
+ return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int get_drl_context(const refmvs_candidate *const ref_mv_stack,
+ const int ref_idx)
+{
+ if (ref_mv_stack[ref_idx].weight >= 640)
+ return ref_mv_stack[ref_idx + 1].weight < 640;
+
+ return ref_mv_stack[ref_idx + 1].weight < 640 ? 2 : 0;
+}
+
+static inline unsigned get_cur_frame_segid(const int by, const int bx,
+ const int have_top,
+ const int have_left,
+ int *const seg_ctx,
+ const uint8_t *cur_seg_map,
+ const ptrdiff_t stride)
+{
+ cur_seg_map += bx + by * stride;
+ if (have_left && have_top) {
+ const int l = cur_seg_map[-1];
+ const int a = cur_seg_map[-stride];
+ const int al = cur_seg_map[-(stride + 1)];
+
+ if (l == a && al == l) *seg_ctx = 2;
+ else if (l == a || al == l || a == al) *seg_ctx = 1;
+ else *seg_ctx = 0;
+ return a == al ? a : l;
+ } else {
+ *seg_ctx = 0;
+ return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0;
+ }
+}
+
+static inline void fix_int_mv_precision(mv *const mv) {
+ mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U;
+ mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U;
+}
+
+static inline void fix_mv_precision(const Dav1dFrameHeader *const hdr,
+ mv *const mv)
+{
+ if (hdr->force_integer_mv) {
+ fix_int_mv_precision(mv);
+ } else if (!hdr->hp) {
+ mv->x = (mv->x - (mv->x >> 15)) & ~1U;
+ mv->y = (mv->y - (mv->y >> 15)) & ~1U;
+ }
+}
+
+static inline mv get_gmv_2d(const Dav1dWarpedMotionParams *const gmv,
+ const int bx4, const int by4,
+ const int bw4, const int bh4,
+ const Dav1dFrameHeader *const hdr)
+{
+ switch (gmv->type) {
+ case DAV1D_WM_TYPE_ROT_ZOOM:
+ assert(gmv->matrix[5] == gmv->matrix[2]);
+ assert(gmv->matrix[4] == -gmv->matrix[3]);
+ // fall-through
+ default:
+ case DAV1D_WM_TYPE_AFFINE: {
+ const int x = bx4 * 4 + bw4 * 2 - 1;
+ const int y = by4 * 4 + bh4 * 2 - 1;
+ const int xc = (gmv->matrix[2] - (1 << 16)) * x +
+ gmv->matrix[3] * y + gmv->matrix[0];
+ const int yc = (gmv->matrix[5] - (1 << 16)) * y +
+ gmv->matrix[4] * x + gmv->matrix[1];
+ const int shift = 16 - (3 - !hdr->hp);
+ const int round = (1 << shift) >> 1;
+ mv res = (mv) {
+ .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
+ .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
+ };
+ if (hdr->force_integer_mv)
+ fix_int_mv_precision(&res);
+ return res;
+ }
+ case DAV1D_WM_TYPE_TRANSLATION: {
+ mv res = (mv) {
+ .y = gmv->matrix[0] >> 13,
+ .x = gmv->matrix[1] >> 13,
+ };
+ if (hdr->force_integer_mv)
+ fix_int_mv_precision(&res);
+ return res;
+ }
+ case DAV1D_WM_TYPE_IDENTITY:
+ return (mv) { .x = 0, .y = 0 };
+ }
+}
+
+#endif /* DAV1D_SRC_ENV_H */
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_INTERNAL_H
+#define DAV1D_SRC_INTERNAL_H
+
+#include <stdatomic.h>
+
+#include "dav1d/data.h"
+
+typedef struct Dav1dFrameContext Dav1dFrameContext;
+typedef struct Dav1dTileState Dav1dTileState;
+typedef struct Dav1dTaskContext Dav1dTaskContext;
+typedef struct Dav1dTask Dav1dTask;
+
+#include "common/attributes.h"
+
+#include "src/cdef.h"
+#include "src/picture.h"
+#include "src/thread.h"
+
+typedef struct Dav1dDSPContext {
+ Dav1dCdefDSPContext cdef;
+} Dav1dDSPContext;
+
+struct Dav1dTileGroup {
+ Dav1dData data;
+ int start, end;
+};
+
+enum TaskType {
+ DAV1D_TASK_TYPE_INIT,
+ DAV1D_TASK_TYPE_INIT_CDF,
+ DAV1D_TASK_TYPE_TILE_ENTROPY,
+ DAV1D_TASK_TYPE_ENTROPY_PROGRESS,
+ DAV1D_TASK_TYPE_TILE_RECONSTRUCTION,
+ DAV1D_TASK_TYPE_DEBLOCK_COLS,
+ DAV1D_TASK_TYPE_DEBLOCK_ROWS,
+ DAV1D_TASK_TYPE_CDEF,
+ DAV1D_TASK_TYPE_SUPER_RESOLUTION,
+ DAV1D_TASK_TYPE_LOOP_RESTORATION,
+ DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS,
+ DAV1D_TASK_TYPE_FG_PREP,
+ DAV1D_TASK_TYPE_FG_APPLY,
+};
+
+struct Dav1dContext {
+ Dav1dFrameContext *fc;
+ unsigned n_fc;
+
+ Dav1dTaskContext *tc;
+ unsigned n_tc;
+
+ // cache of OBUs that make up a single frame before we submit them
+ // to a frame worker to be decoded
+ struct Dav1dTileGroup *tile;
+ int n_tile_data_alloc;
+ int n_tile_data;
+ int n_tiles;
+ Dav1dMemPool *seq_hdr_pool;
+ Dav1dRef *seq_hdr_ref;
+ Dav1dSequenceHeader *seq_hdr;
+ Dav1dMemPool *frame_hdr_pool;
+ Dav1dRef *frame_hdr_ref;
+ Dav1dFrameHeader *frame_hdr;
+
+ Dav1dRef *content_light_ref;
+ Dav1dContentLightLevel *content_light;
+ Dav1dRef *mastering_display_ref;
+ Dav1dMasteringDisplay *mastering_display;
+ Dav1dRef *itut_t35_ref;
+ Dav1dITUTT35 *itut_t35;
+
+ // decoded output picture queue
+ Dav1dData in;
+ Dav1dThreadPicture out, cache;
+ // dummy is a pointer to prevent compiler errors about atomic_load()
+ // not taking const arguments
+ atomic_int flush_mem, *flush;
+ struct {
+ Dav1dThreadPicture *out_delayed;
+ unsigned next;
+ } frame_thread;
+
+ // task threading (refer to tc[] for per_thread thingies)
+ struct TaskThreadData {
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ atomic_uint first;
+ unsigned cur;
+ // This is used for delayed reset of the task cur pointer when
+ // such operation is needed but the thread doesn't enter a critical
+ // section (typically when executing the next sbrow task locklessly).
+ // See src/thread_task.c:reset_task_cur().
+ atomic_uint reset_task_cur;
+ atomic_int cond_signaled;
+ struct {
+ int exec;
+ pthread_cond_t cond;
+ const Dav1dPicture *in;
+ Dav1dPicture *out;
+ enum TaskType type;
+ atomic_int progress[2]; /* [0]=started, [1]=completed */
+ union {
+ struct {
+ ALIGN(uint8_t scaling_8bpc[3][256], 64);
+ };
+ struct {
+ ALIGN(uint8_t scaling_16bpc[3][4096], 64);
+ };
+ };
+ } delayed_fg;
+ int inited;
+ } task_thread;
+
+ // reference/entropy state
+ Dav1dMemPool *segmap_pool;
+ Dav1dMemPool *refmvs_pool;
+ struct {
+ Dav1dThreadPicture p;
+ Dav1dRef *segmap;
+ Dav1dRef *refmvs;
+ unsigned refpoc[7];
+ } refs[8];
+ Dav1dMemPool *cdf_pool;
+
+ Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
+ //Dav1dRefmvsDSPContext refmvs_dsp;
+
+ Dav1dPicAllocator allocator;
+ int apply_grain;
+ int operating_point;
+ unsigned operating_point_idc;
+ int all_layers;
+ int max_spatial_id;
+ unsigned frame_size_limit;
+ int strict_std_compliance;
+ int output_invisible_frames;
+ enum Dav1dInloopFilterType inloop_filters;
+ int drain;
+ enum PictureFlags frame_flags;
+ enum Dav1dEventFlags event_flags;
+ Dav1dDataProps cached_error_props;
+ int cached_error;
+
+ Dav1dLogger logger;
+
+ Dav1dMemPool *picture_pool;
+};
+
+struct Dav1dTask {
+ unsigned frame_idx; // frame thread id
+ enum TaskType type; // task work
+ int sby; // sbrow
+
+ // task dependencies
+ int recon_progress, deblock_progress;
+ int deps_skip;
+ struct Dav1dTask *next; // only used in task queue
+};
+
+struct Dav1dFrameContext {
+ Dav1dRef *seq_hdr_ref;
+ Dav1dSequenceHeader *seq_hdr;
+ Dav1dRef *frame_hdr_ref;
+ Dav1dFrameHeader *frame_hdr;
+ Dav1dThreadPicture refp[7];
+ Dav1dPicture cur; // during block coding / reconstruction
+ Dav1dThreadPicture sr_cur; // after super-resolution upscaling
+ Dav1dRef *mvs_ref;
+ Dav1dRef *ref_mvs_ref[7];
+ Dav1dRef *cur_segmap_ref, *prev_segmap_ref;
+ uint8_t *cur_segmap;
+ const uint8_t *prev_segmap;
+ unsigned refpoc[7], refrefpoc[7][7];
+ uint8_t gmv_warp_allowed[7];
+ struct Dav1dTileGroup *tile;
+ int n_tile_data_alloc;
+ int n_tile_data;
+
+ // for scalable references
+ struct ScalableMotionParams {
+ int scale; // if no scaling, this is 0
+ int step;
+ } svc[7][2 /* x, y */];
+ int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
+
+ const Dav1dContext *c;
+ Dav1dTileState *ts;
+ int n_ts;
+ const Dav1dDSPContext *dsp;
+
+ int ipred_edge_sz;
+ pixel *ipred_edge[3];
+ ptrdiff_t b4_stride;
+ int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
+ uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
+ int a_sz /* w*tile_rows */;
+ uint8_t jnt_weights[7][7];
+ int bitdepth_max;
+
+ struct {
+ int next_tile_row[2 /* 0: reconstruction, 1: entropy */];
+ int entropy_progress;
+ atomic_int deblock_progress; // in sby units
+ atomic_uint *frame_progress, *copy_lpf_progress;
+ // indexed using t->by * f->b4_stride + t->bx
+ struct CodedBlockInfo {
+ int16_t eob[3 /* plane */];
+ uint8_t txtp[3 /* plane */];
+ } *cbi;
+ // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1)
+ uint16_t (*pal)[3 /* plane */][8 /* idx */];
+ // iterated over inside tile state
+ uint8_t *pal_idx;
+ coef *cf;
+ int prog_sz;
+ int pal_sz, pal_idx_sz, cf_sz;
+ // start offsets per tile
+ int *tile_start_off;
+ } frame_thread;
+
+ struct {
+ pthread_cond_t cond;
+ struct TaskThreadData *ttd;
+ struct Dav1dTask *tasks, *tile_tasks[2], init_task;
+ int num_tasks, num_tile_tasks;
+ int init_done;
+ int done[2];
+ int retval;
+ int update_set; // whether we need to update CDF reference
+ atomic_int error;
+ int task_counter;
+ struct Dav1dTask *task_head, *task_tail;
+ // Points to the task directly before the cur pointer in the queue.
+ // This cur pointer is theoretical here, we actually keep track of the
+ // "prev_t" variable. This is needed to not loose the tasks in
+ // [head;cur-1] when picking one for execution.
+ struct Dav1dTask *task_cur_prev;
+ } task_thread;
+
+ // threading (refer to tc[] for per-thread things)
+ struct FrameTileThreadData {
+ int (*lowest_pixel_mem)[7][2];
+ int lowest_pixel_mem_sz;
+ } tile_thread;
+};
+
+struct Dav1dTileState {
+
+ struct {
+ int col_start, col_end, row_start, row_end; // in 4px units
+ int col, row; // in tile units
+ } tiling;
+
+ // in sby units, TILE_ERROR after a decoding error
+ atomic_int progress[2 /* 0: reconstruction, 1: entropy */];
+ struct {
+ uint8_t *pal_idx;
+ coef *cf;
+ } frame_thread[2 /* 0: reconstruction, 1: entropy */];
+
+ // in fullpel units, [0] = Y, [1] = UV, used for progress requirements
+ // each entry is one tile-sbrow; middle index is refidx
+ int (*lowest_pixel)[7][2];
+
+ uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
+ const uint16_t (*dq)[3][2];
+ int last_qidx;
+
+ int8_t last_delta_lf[4];
+ uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
+ const uint8_t (*lflvl)[4][8][2];
+
+};
+
+struct Dav1dTaskContext {
+ const Dav1dContext *c;
+ const Dav1dFrameContext *f;
+ Dav1dTileState *ts;
+ int bx, by;
+ ALIGN(union, 64) {
+ int16_t cf_8bpc [32 * 32];
+ int32_t cf_16bpc[32 * 32];
+ };
+ // FIXME types can be changed to pixel (and dynamically allocated)
+ // which would make copy/assign operations slightly faster?
+ uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
+ uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
+ uint8_t txtp_map[32 * 32]; // inter-only
+ ALIGN(union, 64) {
+ struct {
+ union {
+ uint8_t lap_8bpc [128 * 32];
+ uint16_t lap_16bpc[128 * 32];
+ struct {
+ int16_t compinter[2][128 * 128];
+ uint8_t seg_mask[128 * 128];
+ };
+ };
+ union {
+ // stride=192 for non-SVC, or 320 for SVC
+ uint8_t emu_edge_8bpc [320 * (256 + 7)];
+ uint16_t emu_edge_16bpc[320 * (256 + 7)];
+ };
+ };
+ struct {
+ union {
+ uint8_t levels[32 * 34];
+ struct {
+ uint8_t pal_order[64][8];
+ uint8_t pal_ctx[64];
+ };
+ };
+ int16_t ac[32 * 32];
+ uint8_t pal_idx[2 * 64 * 64];
+ uint16_t pal[3 /* plane */][8 /* palette_idx */];
+ ALIGN(union, 64) {
+ struct {
+ uint8_t interintra_8bpc[64 * 64];
+ uint8_t edge_8bpc[257];
+ };
+ struct {
+ uint16_t interintra_16bpc[64 * 64];
+ uint16_t edge_16bpc[257];
+ };
+ };
+ };
+ } scratch;
+
+ Dav1dWarpedMotionParams warpmv;
+ int top_pre_cdef_toggle;
+ int8_t *cur_sb_cdef_idx_ptr;
+ // for chroma sub8x8, we need to know the filter for all 4 subblocks in
+ // a 4x4 area, but the top/left one can go out of cache already, so this
+ // keeps it accessible
+
+ struct {
+ int pass;
+ } frame_thread;
+ struct {
+ struct thread_data td;
+ struct TaskThreadData *ttd;
+ struct FrameTileThreadData *fttd;
+ int flushed;
+ int die;
+ } task_thread;
+};
+
+#endif /* DAV1D_SRC_INTERNAL_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_LEVELS_H
+#define DAV1D_SRC_LEVELS_H
+
+#include <stdint.h>
+
+#include "dav1d/headers.h"
+#include "common/attributes.h"
+
+enum ObuMetaType {
+ OBU_META_HDR_CLL = 1,
+ OBU_META_HDR_MDCV = 2,
+ OBU_META_SCALABILITY = 3,
+ OBU_META_ITUT_T35 = 4,
+ OBU_META_TIMECODE = 5,
+};
+
+enum TxfmSize {
+ TX_4X4,
+ TX_8X8,
+ TX_16X16,
+ TX_32X32,
+ TX_64X64,
+ N_TX_SIZES,
+};
+
+enum BlockLevel {
+ BL_128X128,
+ BL_64X64,
+ BL_32X32,
+ BL_16X16,
+ BL_8X8,
+ N_BL_LEVELS,
+};
+
+enum RectTxfmSize {
+ RTX_4X8 = N_TX_SIZES,
+ RTX_8X4,
+ RTX_8X16,
+ RTX_16X8,
+ RTX_16X32,
+ RTX_32X16,
+ RTX_32X64,
+ RTX_64X32,
+ RTX_4X16,
+ RTX_16X4,
+ RTX_8X32,
+ RTX_32X8,
+ RTX_16X64,
+ RTX_64X16,
+ N_RECT_TX_SIZES
+};
+
+enum TxfmType {
+ DCT_DCT, // DCT in both horizontal and vertical
+ ADST_DCT, // ADST in vertical, DCT in horizontal
+ DCT_ADST, // DCT in vertical, ADST in horizontal
+ ADST_ADST, // ADST in both directions
+ FLIPADST_DCT,
+ DCT_FLIPADST,
+ FLIPADST_FLIPADST,
+ ADST_FLIPADST,
+ FLIPADST_ADST,
+ IDTX,
+ V_DCT,
+ H_DCT,
+ V_ADST,
+ H_ADST,
+ V_FLIPADST,
+ H_FLIPADST,
+ N_TX_TYPES,
+ WHT_WHT = N_TX_TYPES,
+ N_TX_TYPES_PLUS_LL,
+};
+
+enum TxClass {
+ TX_CLASS_2D,
+ TX_CLASS_H,
+ TX_CLASS_V,
+};
+
+enum IntraPredMode {
+ DC_PRED,
+ VERT_PRED,
+ HOR_PRED,
+ DIAG_DOWN_LEFT_PRED,
+ DIAG_DOWN_RIGHT_PRED,
+ VERT_RIGHT_PRED,
+ HOR_DOWN_PRED,
+ HOR_UP_PRED,
+ VERT_LEFT_PRED,
+ SMOOTH_PRED,
+ SMOOTH_V_PRED,
+ SMOOTH_H_PRED,
+ PAETH_PRED,
+ N_INTRA_PRED_MODES,
+ CFL_PRED = N_INTRA_PRED_MODES,
+ N_UV_INTRA_PRED_MODES,
+ N_IMPL_INTRA_PRED_MODES = N_UV_INTRA_PRED_MODES,
+ LEFT_DC_PRED = DIAG_DOWN_LEFT_PRED,
+ TOP_DC_PRED,
+ DC_128_PRED,
+ Z1_PRED,
+ Z2_PRED,
+ Z3_PRED,
+ FILTER_PRED = N_INTRA_PRED_MODES,
+};
+
+enum InterIntraPredMode {
+ II_DC_PRED,
+ II_VERT_PRED,
+ II_HOR_PRED,
+ II_SMOOTH_PRED,
+ N_INTER_INTRA_PRED_MODES,
+};
+
+enum BlockPartition {
+ PARTITION_NONE, // [ ] <-.
+ PARTITION_H, // [-] |
+ PARTITION_V, // [|] |
+ PARTITION_SPLIT, // [+] --'
+ PARTITION_T_TOP_SPLIT, // [⊥] i.e. split top, H bottom
+ PARTITION_T_BOTTOM_SPLIT, // [т] i.e. H top, split bottom
+ PARTITION_T_LEFT_SPLIT, // [-|] i.e. split left, V right
+ PARTITION_T_RIGHT_SPLIT, // [|-] i.e. V left, split right
+ PARTITION_H4, // [Ⲷ]
+ PARTITION_V4, // [Ⲽ]
+ N_PARTITIONS,
+ N_SUB8X8_PARTITIONS = PARTITION_T_TOP_SPLIT,
+};
+
+enum BlockSize {
+ BS_128x128,
+ BS_128x64,
+ BS_64x128,
+ BS_64x64,
+ BS_64x32,
+ BS_64x16,
+ BS_32x64,
+ BS_32x32,
+ BS_32x16,
+ BS_32x8,
+ BS_16x64,
+ BS_16x32,
+ BS_16x16,
+ BS_16x8,
+ BS_16x4,
+ BS_8x32,
+ BS_8x16,
+ BS_8x8,
+ BS_8x4,
+ BS_4x16,
+ BS_4x8,
+ BS_4x4,
+ N_BS_SIZES,
+};
+
+enum Filter2d { // order is horizontal, vertical
+ FILTER_2D_8TAP_REGULAR,
+ FILTER_2D_8TAP_REGULAR_SMOOTH,
+ FILTER_2D_8TAP_REGULAR_SHARP,
+ FILTER_2D_8TAP_SHARP_REGULAR,
+ FILTER_2D_8TAP_SHARP_SMOOTH,
+ FILTER_2D_8TAP_SHARP,
+ FILTER_2D_8TAP_SMOOTH_REGULAR,
+ FILTER_2D_8TAP_SMOOTH,
+ FILTER_2D_8TAP_SMOOTH_SHARP,
+ FILTER_2D_BILINEAR,
+ N_2D_FILTERS,
+};
+
+enum MVJoint {
+ MV_JOINT_ZERO,
+ MV_JOINT_H,
+ MV_JOINT_V,
+ MV_JOINT_HV,
+ N_MV_JOINTS,
+};
+
+enum InterPredMode {
+ NEARESTMV,
+ NEARMV,
+ GLOBALMV,
+ NEWMV,
+ N_INTER_PRED_MODES,
+};
+
+enum DRL_PROXIMITY {
+ NEAREST_DRL,
+ NEARER_DRL,
+ NEAR_DRL,
+ NEARISH_DRL
+};
+
+enum CompInterPredMode {
+ NEARESTMV_NEARESTMV,
+ NEARMV_NEARMV,
+ NEARESTMV_NEWMV,
+ NEWMV_NEARESTMV,
+ NEARMV_NEWMV,
+ NEWMV_NEARMV,
+ GLOBALMV_GLOBALMV,
+ NEWMV_NEWMV,
+ N_COMP_INTER_PRED_MODES,
+};
+
+enum CompInterType {
+ COMP_INTER_NONE,
+ COMP_INTER_WEIGHTED_AVG,
+ COMP_INTER_AVG,
+ COMP_INTER_SEG,
+ COMP_INTER_WEDGE,
+};
+
+enum InterIntraType {
+ INTER_INTRA_NONE,
+ INTER_INTRA_BLEND,
+ INTER_INTRA_WEDGE,
+};
+
+typedef union mv {
+ struct {
+ int16_t y, x;
+ };
+ uint32_t n;
+} mv;
+
+enum MotionMode {
+ MM_TRANSLATION,
+ MM_OBMC,
+ MM_WARP,
+};
+
+#define QINDEX_RANGE 256
+
+typedef struct Av1Block {
+ uint8_t bl, bs, bp;
+ uint8_t intra, seg_id, skip_mode, skip, uvtx;
+ union {
+ struct {
+ uint8_t y_mode, uv_mode, tx, pal_sz[2];
+ int8_t y_angle, uv_angle, cfl_alpha[2];
+ }; // intra
+ struct {
+ union {
+ struct {
+ union mv mv[2];
+ uint8_t wedge_idx, mask_sign, interintra_mode;
+ };
+ struct {
+ union mv mv2d;
+ int16_t matrix[4];
+ };
+ };
+ uint8_t comp_type, inter_mode, motion_mode, drl_idx;
+ int8_t ref[2];
+ uint8_t max_ytx, filter2d, interintra_type, tx_split0;
+ uint16_t tx_split1;
+ }; // inter
+ };
+} Av1Block;
+
+#endif /* DAV1D_SRC_LEVELS_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "dav1d/dav1d.h"
+
+#include "common/validate.h"
+
+#include "src/internal.h"
+#include "src/log.h"
+
+#if CONFIG_LOG
+COLD void dav1d_log_default_callback(void *const cookie,
+ const char *const format, va_list ap)
+{
+ vfprintf(stderr, format, ap);
+}
+
+COLD void dav1d_log(Dav1dContext *const c, const char *const format, ...) {
+ validate_input(c != NULL);
+
+ if (!c->logger.callback)
+ return;
+
+ va_list ap;
+ va_start(ap, format);
+ c->logger.callback(c->logger.cookie, format, ap);
+ va_end(ap);
+}
+#endif
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_LOG_H
+#define DAV1D_SRC_LOG_H
+
+#include "config.h"
+
+#include <stdarg.h>
+
+#include "dav1d/dav1d.h"
+
+#include "common/attributes.h"
+
+#if CONFIG_LOG
+#define dav1d_log dav1d_log
+void dav1d_log_default_callback(void *cookie, const char *format, va_list ap);
+void dav1d_log(Dav1dContext *c, const char *format, ...) ATTR_FORMAT_PRINTF(2, 3);
+#else
+#define dav1d_log_default_callback NULL
+#define dav1d_log(...) do { } while(0)
+#endif
+
+#endif /* DAV1D_SRC_LOG_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_MEM_H
+#define DAV1D_SRC_MEM_H
+
+#include <stdlib.h>
+
+#if defined(HAVE_ALIGNED_MALLOC) || defined(HAVE_MEMALIGN)
+#include <malloc.h>
+#endif
+
+#include "common/attributes.h"
+
+#include "src/thread.h"
+
+typedef struct Dav1dMemPoolBuffer {
+ void *data;
+ struct Dav1dMemPoolBuffer *next;
+} Dav1dMemPoolBuffer;
+
+typedef struct Dav1dMemPool {
+ pthread_mutex_t lock;
+ Dav1dMemPoolBuffer *buf;
+ int ref_cnt;
+ int end;
+} Dav1dMemPool;
+
+void dav1d_mem_pool_push(Dav1dMemPool *pool, Dav1dMemPoolBuffer *buf);
+Dav1dMemPoolBuffer *dav1d_mem_pool_pop(Dav1dMemPool *pool, size_t size);
+int dav1d_mem_pool_init(Dav1dMemPool **pool);
+void dav1d_mem_pool_end(Dav1dMemPool *pool);
+
+/*
+ * Allocate align-byte aligned memory. The return value can be released
+ * by calling the dav1d_free_aligned() function.
+ */
+static inline void *dav1d_alloc_aligned(size_t sz, size_t align) {
+ assert(!(align & (align - 1)));
+#ifdef HAVE_POSIX_MEMALIGN
+ void *ptr;
+ if (posix_memalign(&ptr, align, sz)) return NULL;
+ return ptr;
+#elif defined(HAVE_ALIGNED_MALLOC)
+ return _aligned_malloc(sz, align);
+#elif defined(HAVE_MEMALIGN)
+ return memalign(align, sz);
+#else
+#error Missing aligned alloc implementation
+#endif
+}
+
+static inline void dav1d_free_aligned(void* ptr) {
+#ifdef HAVE_POSIX_MEMALIGN
+ free(ptr);
+#elif defined(HAVE_ALIGNED_MALLOC)
+ _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN)
+ free(ptr);
+#endif
+}
+
+static inline void dav1d_freep_aligned(void* ptr) {
+ void **mem = (void **) ptr;
+ if (*mem) {
+ dav1d_free_aligned(*mem);
+ *mem = NULL;
+ }
+}
+
+static inline void freep(void *ptr) {
+ void **mem = (void **) ptr;
+ if (*mem) {
+ free(*mem);
+ *mem = NULL;
+ }
+}
+
+#endif /* DAV1D_SRC_MEM_H */
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_PICTURE_H
+#define DAV1D_SRC_PICTURE_H
+
+#include <stdatomic.h>
+
+#include "src/thread.h"
+#include "dav1d/picture.h"
+
+#include "src/thread_data.h"
+#include "src/ref.h"
+
+enum PlaneType {
+ PLANE_TYPE_Y,
+ PLANE_TYPE_UV,
+ PLANE_TYPE_BLOCK,
+ PLANE_TYPE_ALL,
+};
+
+enum PictureFlags {
+ PICTURE_FLAG_NEW_SEQUENCE = 1 << 0,
+ PICTURE_FLAG_NEW_OP_PARAMS_INFO = 1 << 1,
+ PICTURE_FLAG_NEW_TEMPORAL_UNIT = 1 << 2,
+};
+
+typedef struct Dav1dThreadPicture {
+ Dav1dPicture p;
+ int visible;
+ // This can be set for inter frames, non-key intra frames, or for invisible
+ // keyframes that have not yet been made visible using the show-existing-frame
+ // mechanism.
+ int showable;
+ enum PictureFlags flags;
+ // [0] block data (including segmentation map and motion vectors)
+ // [1] pixel data
+ atomic_uint *progress;
+} Dav1dThreadPicture;
+
+typedef struct Dav1dPictureBuffer {
+ void *data;
+ struct Dav1dPictureBuffer *next;
+} Dav1dPictureBuffer;
+
+/*
+ * Allocate a picture with custom border size.
+ */
+int dav1d_thread_picture_alloc(Dav1dContext *c, Dav1dFrameContext *f, const int bpc);
+
+/**
+ * Allocate a picture with identical metadata to an existing picture.
+ * The width is a separate argument so this function can be used for
+ * super-res, where the width changes, but everything else is the same.
+ * For the more typical use case of allocating a new image of the same
+ * dimensions, use src->p.w as width.
+ */
+int dav1d_picture_alloc_copy(Dav1dContext *c, Dav1dPicture *dst, const int w,
+ const Dav1dPicture *src);
+
+/**
+ * Create a copy of a picture.
+ */
+void dav1d_picture_ref(Dav1dPicture *dst, const Dav1dPicture *src);
+void dav1d_thread_picture_ref(Dav1dThreadPicture *dst,
+ const Dav1dThreadPicture *src);
+void dav1d_thread_picture_move_ref(Dav1dThreadPicture *dst,
+ Dav1dThreadPicture *src);
+void dav1d_thread_picture_unref(Dav1dThreadPicture *p);
+
+/**
+ * Move a picture reference.
+ */
+void dav1d_picture_move_ref(Dav1dPicture *dst, Dav1dPicture *src);
+
+int dav1d_default_picture_alloc(Dav1dPicture *p, void *cookie);
+void dav1d_default_picture_release(Dav1dPicture *p, void *cookie);
+void dav1d_picture_unref_internal(Dav1dPicture *p);
+
+/**
+ * Get event flags from picture flags.
+ */
+enum Dav1dEventFlags dav1d_picture_get_event_flags(const Dav1dThreadPicture *p);
+
+#endif /* DAV1D_SRC_PICTURE_H */
--- /dev/null
+/*
+ * Copyright © 2019, Luca Barbato
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+
+#include "common/bitdepth.h"
+#include "common/intops.h"
+
+#include "src/cdef.h"
+#include "src/cpu.h"
+
+/*
+#define cdef_svp64_fn(w, h) \
+void cdef_filter_block_##w##x##h##_svp64(pixel *const dst, \
+ const ptrdiff_t dst_stride, \
+ const pixel (*left)[2], \
+ const pixel *const top, \
+ const pixel *const bottom, \
+ const int pri_strength, \
+ const int sec_strength, \
+ const int dir, \
+ const int damping, \
+ const enum CdefEdgeFlags edges)
+
+cdef_svp64_fn(4, 4);
+cdef_svp64_fn(4, 8);
+cdef_svp64_fn(8, 8);*/
+
+int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX);
+
+int cdef_find_dir_svp64_real(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX);
+
+static ALWAYS_INLINE void cdef_dsp_init_ppc(Dav1dCdefDSPContext *const c) {
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_PPC_CPU_FLAG_SVP64)) return;
+
+#ifdef HAVE_SVP64
+ c->dir = cdef_find_dir_svp64;
+#endif
+
+}
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/ppc/cdef.h"
+#include "src/tables.h"
+
+int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX)
+{
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ int partial_sum_hv[2][8] = { { 0 } };
+ int partial_sum_diag[2][15] = { { 0 } };
+ int partial_sum_alt[4][11] = { { 0 } };
+
+ for (int y = 0; y < 8; y++) {
+ for (int x = 0; x < 8; x++) {
+ const int px = (img[x] >> bitdepth_min_8) - 128;
+
+ partial_sum_diag[0][ y + x ] += px;
+ partial_sum_alt [0][ y + (x >> 1)] += px;
+ partial_sum_hv [0][ y ] += px;
+ partial_sum_alt [1][3 + y - (x >> 1)] += px;
+ partial_sum_diag[1][7 + y - x ] += px;
+ partial_sum_alt [2][3 - (y >> 1) + x ] += px;
+ partial_sum_hv [1][ x ] += px;
+ partial_sum_alt [3][ (y >> 1) + x ] += px;
+ }
+ img += PXSTRIDE(stride);
+ }
+
+ unsigned cost[8] = { 0 };
+/* for (int n = 0; n < 8; n++) {
+ cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n];
+ cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n];
+ }
+ cost[2] *= 105;
+ cost[6] *= 105;
+
+ static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 };
+ for (int n = 0; n < 7; n++) {
+ const int d = div_table[n];
+ cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] +
+ partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d;
+ cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] +
+ partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d;
+ }
+ cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105;
+ cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105;
+
+ for (int n = 0; n < 4; n++) {
+ unsigned *const cost_ptr = &cost[n * 2 + 1];
+ for (int m = 0; m < 5; m++)
+ *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m];
+ *cost_ptr *= 105;
+ for (int m = 0; m < 3; m++) {
+ const int d = div_table[2 * m + 1];
+ *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] +
+ partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d;
+ }
+ }
+*/
+ int best_dir = 0;
+ unsigned best_cost = cost[0];
+ for (int n = 1; n < 8; n++) {
+ if (cost[n] > best_cost) {
+ best_cost = cost[n];
+ best_dir = n;
+ }
+ }
+
+ *var = (best_cost - (cost[best_dir ^ 4])) >> 10;
+ return best_dir;
+}
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/ppc/cdef.h"
+
+int cdef_find_dir_svp64_real(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX)
+{
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ int partial_sum_hv[2][8] = { { 0 } };
+ int partial_sum_diag[2][15] = { { 0 } };
+ int partial_sum_alt[4][11] = { { 0 } };
+/*
+ for (int y = 0; y < 8; y++) {
+ for (int x = 0; x < 8; x++) {
+ const int px = (img[x] >> bitdepth_min_8) - 128;
+
+ partial_sum_diag[0][ y + x ] += px;
+ partial_sum_alt [0][ y + (x >> 1)] += px;
+ partial_sum_hv [0][ y ] += px;
+ partial_sum_alt [1][3 + y - (x >> 1)] += px;
+ partial_sum_diag[1][7 + y - x ] += px;
+ partial_sum_alt [2][3 - (y >> 1) + x ] += px;
+ partial_sum_hv [1][ x ] += px;
+ partial_sum_alt [3][ (y >> 1) + x ] += px;
+ }
+ img += PXSTRIDE(stride);
+ }
+
+ unsigned cost[8] = { 0 };
+ for (int n = 0; n < 8; n++) {
+ cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n];
+ cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n];
+ }
+ cost[2] *= 105;
+ cost[6] *= 105;
+
+ static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 };
+ for (int n = 0; n < 7; n++) {
+ const int d = div_table[n];
+ cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] +
+ partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d;
+ cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] +
+ partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d;
+ }
+ cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105;
+ cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105;
+
+ for (int n = 0; n < 4; n++) {
+ unsigned *const cost_ptr = &cost[n * 2 + 1];
+ for (int m = 0; m < 5; m++)
+ *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m];
+ *cost_ptr *= 105;
+ for (int m = 0; m < 3; m++) {
+ const int d = div_table[2 * m + 1];
+ *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] +
+ partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d;
+ }
+ }
+
+ int best_dir = 0;
+ unsigned best_cost = cost[0];
+ for (int n = 1; n < 8; n++) {
+ if (cost[n] > best_cost) {
+ best_cost = cost[n];
+ best_dir = n;
+ }
+ }
+
+ *var = (best_cost - (cost[best_dir ^ 4])) >> 10;
+ return best_dir;*/
+ return 0;
+}
--- /dev/null
+.set y, 1
+.set x, 2
+
+.set img_ptr, 3
+.set stride, 4
+.set var, 5
+.set bd, 6 # bitdepth_min_8
+
+.set cost, 7 # cost array, 8 elements
+.set divt, 14 # div_table[8]
+.set img, 24 # img array, 8x8 = 64 elements
+.set psum, 88 # We will place the results of the psums here
+.set tmp, 108 # temporary elements
+.set tmp2, 116 # temporary elements
+
+
+ .machine libresoc
+ .file "cdef_tmpl_svp64_real.c"
+ .abiversion 2
+ .section ".text"
+ .align 2
+ .globl cdef_find_dir_svp64_real
+ .type cdef_find_dir_svp64_real, @function
+cdef_find_dir_svp64_real:
+.L0:
+ .cfi_startproc
+ # Load div_table[7] array
+ # div_table[7] = { 840, 420, 280, 210, 168, 140, 120 };
+ li divt+0, 840
+ li divt+1, 420
+ li divt+2, 280
+ li divt+3, 210
+ li divt+4, 168
+ li divt+5, 140
+ li divt+6, 120
+ li divt+7, 105 # Add 105 as element 8 of the divt table
+ # saves having to do special case for it
+
+.L1:
+ # Load 8x8 8-bit elements from img_ptr in groups of 8 with stride
+ setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ sv.lha *img, 0(img_ptr) # Load 8 ints from (img_ptr)
+ add img_ptr, img_ptr, stride # Advance img_ptr by stride
+ sv.lha *img + 8, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 16, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 24, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 32, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 40, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 48, 0(img_ptr)
+ add img_ptr, img_ptr, stride
+ sv.lha *img + 56, 0(img_ptr)
+
+ setvl 0,0,64,0,1,1 # Set VL to 64 elements
+ sv.sraw *img, *img, bd # img[x] >> bitdepth_min_8
+ sv.addi *img, *img, -128 # px = (img[x] >> bitdepth_min_8) - 128
+
+ # Zero psum registers for partial_sum_hv
+ setvl 0,0,16,0,1,1 # Set VL to 16 elements
+ sv.ori *psum, 0, 0
+
+ # First do the horizontal partial sums:
+ # partial_sum_hv[0][y] += px;
+ setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ sv.add/mr psum+0, psum+0, *img+0
+ sv.add/mr psum+1, psum+1, *img+8
+ sv.add/mr psum+2, psum+2, *img+16
+ sv.add/mr psum+3, psum+3, *img+24
+ sv.add/mr psum+4, psum+4, *img+32
+ sv.add/mr psum+5, psum+5, *img+40
+ sv.add/mr psum+6, psum+6, *img+48
+ sv.add/mr psum+7, psum+7, *img+56
+
+ # Next the vertical partial sums:
+ # partial_sum_hv[1][x] += px;
+ sv.add/mr *psum+8, *psum+8, *img+0
+ sv.add/mr *psum+8, *psum+8, *img+8
+ sv.add/mr *psum+8, *psum+8, *img+16
+ sv.add/mr *psum+8, *psum+8, *img+24
+ sv.add/mr *psum+8, *psum+8, *img+32
+ sv.add/mr *psum+8, *psum+8, *img+40
+ sv.add/mr *psum+8, *psum+8, *img+48
+ sv.add/mr *psum+8, *psum+8, *img+56
+
+ # Zero cost registers
+ setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ sv.ori *cost, 0, 0
+
+ # cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n];
+ sv.maddld/mr cost+2, *psum, *psum, cost+2
+ # cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n];
+ sv.maddld/mr cost+6, *psum+8, *psum+8, cost+6
+
+ # cost[2] *= 105
+ # cost[6] *= 105
+ mulli cost+2, cost+2, 105
+ mulli cost+6, cost+6, 105
+
+ # We're done with partial_sum_hv values, we can reuse the registers
+ # for partial_sum_diag
+ # Zero psum registers for partial_sum_diag
+ setvl 0,0,30,0,1,1 # Set VL to 30 elements
+ sv.ori *psum, 0, 0
+
+ setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ # First row of diagonal partial sums:
+ # partial_sum_diag[0][y + x] += px;
+ sv.add/mr *psum+0, *psum+0, *img+0
+ sv.add/mr *psum+1, *psum+1, *img+8
+ sv.add/mr *psum+2, *psum+2, *img+16
+ sv.add/mr *psum+3, *psum+3, *img+24
+ sv.add/mr *psum+4, *psum+4, *img+32
+ sv.add/mr *psum+5, *psum+5, *img+40
+ sv.add/mr *psum+6, *psum+6, *img+48
+ sv.add/mr *psum+7, *psum+7, *img+56
+
+ # Second row of diagonal partial sums:
+ # partial_sum_diag[1][7 + y - x] += px;
+ sv.add/mr *psum+15, *psum+15, *img+56
+ sv.add/mr *psum+16, *psum+16, *img+48
+ sv.add/mr *psum+17, *psum+17, *img+40
+ sv.add/mr *psum+18, *psum+18, *img+32
+ sv.add/mr *psum+19, *psum+19, *img+24
+ sv.add/mr *psum+20, *psum+20, *img+16
+ sv.add/mr *psum+21, *psum+21, *img+8
+ sv.add/mr *psum+22, *psum+22, *img+0
+ # these were calculated correctly but in reverse order,
+ # but since they're going to be used in a sum, order is not important.
+
+ setvl 0,0,15,0,1,1 # Set VL to 15 elements
+ sv.ori *tmp, 0, 0
+
+ # cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] +
+ # partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d;
+ # Produce squares of all values
+ sv.maddld/mr *tmp, *psum+0, *psum+0, *tmp
+ # Handle the first 8 elements in order, *includes* partial_sum_diag[0][7]!
+ #setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ #sv.mulld *tmp, *tmp, *divt
+ # Handle remaining 7 elements, in reverse order
+ setvl 0,0,7,0,1,1 # Set VL to 7 elements
+ sv.svstep/mrr *tmp2, 6, 1
+ svindex 29,0b1,7,0,0,0,0
+ sv.ori *tmp, *divt, 0
+ #sv.mulld *tmp, *tmp, *divt
+ # Now sum those up to cost[0] element
+ #setvl 0,0,15,0,1,1 # Set VL to 15 elements
+ #sv.add/mr cost+0, *tmp, cost+0
+
+ # Similarly for cost[4]
+ # cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] +
+ # partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d;
+ #sv.maddld/mr *tmp, *psum+16, *psum+16, *tmp
+ #sv.maddld/mr *tmp, *psum+24, *psum+24, *tmp
+ #sv.mulld *tmp, *tmp, *divt
+ #sv.add/mr cost+4, *tmp, cost+4
+
+
+ # Zero psum registers for partial_sum_alt, process half of 44
+ #setvl 0,0,22,0,1,1 # Set VL to 22 elements
+ #sv.ori psum, 0, 0
+
+ # First row of alt partial sums:
+ # partial_sum_alt [0][y + (x >> 1)] += px;
+ # These are essentially calculated the following way:
+ # horiz axis: x, vert axis: y, quantity of y + (x>>1):
+ #
+ # | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ # | 0 | 0 | 0 | 1 | 1 | 2 | 2 | 3 | 3 |
+ # | 1 | 1 | 1 | 2 | 2 | 3 | 3 | 4 | 4 |
+ # | 2 | 2 | 2 | 3 | 3 | 4 | 4 | 5 | 5 |
+ # | 3 | 3 | 3 | 4 | 4 | 5 | 5 | 6 | 6 |
+ # | 4 | 4 | 4 | 5 | 5 | 6 | 6 | 7 | 7 |
+ # | 5 | 5 | 5 | 6 | 6 | 7 | 7 | 8 | 8 |
+ # | 6 | 6 | 6 | 7 | 7 | 8 | 8 | 9 | 9 |
+ # | 7 | 7 | 7 | 8 | 8 | 9 | 9 | a | a |
+ #
+ # We calculate this in a similar manner to the diagonal
+ # partial sums, but first we have to do pair-wise addition
+ # on all the elements of the img matrix:
+ #setvl 0,0,64,0,1,1 # Set VL to 64 elements
+ #svstep 2
+ #sv.add *img, *img, *img+1
+
+ #setvl 0,0,8,0,1,1 # Set VL to 8 elements
+ #sv.add *psum+0, *psum+0, *img+0
+ #sv.add *psum+0, *psum+0, *img+1
+ #sv.add *psum+1, *psum+1, *img+8
+ #sv.add *psum+1, *psum+1, *img+9
+
+
+ #setvl 0,0,10,0,1,1 # Set VL to 2 elements
+ #sv.add/mr *psum, *psum, *psum+1
+#
+
+
+ blr
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .cfi_endproc
+.LFE27:
+ .size cdef_find_dir_svp64_real,.-cdef_find_dir_svp64_real
+ .ident "GCC: (Debian 8.3.0-6) 8.3.0"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <Python.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "pypowersim_wrapper_common.h"
+
+#include "common/intops.h"
+
+#include "src/ppc/cdef.h"
+#include "src/tables.h"
+
+int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride,
+ unsigned *const var HIGHBD_DECL_SUFFIX)
+{
+ printf("img: %p, stride: %d, var: %p\n", img, stride, var);
+ // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM
+ // we have to memcpy from input to this pointer, the address was chosen arbitrarily
+ uint64_t img_svp64 = 0x100000;
+ uint64_t var_svp64 = 0x200000;
+
+ // Create the pypowersim_state
+ pypowersim_state_t *state = pypowersim_prepare();
+
+ // Change the relevant elements, mandatory: body
+ state->binary = PyBytes_FromStringAndSize((const char *)&cdef_find_dir_svp64_real, 1000);
+
+ // Set GPR #3 to the img pointer
+ PyObject *img_address = PyLong_FromUnsignedLongLong(img_svp64);
+ PyList_SetItem(state->initial_regs, 3, img_address);
+
+ // Set GPR #4 to the output pointer
+ PyObject *stride_svp64 = PyLong_FromUnsignedLongLong(stride);
+ PyList_SetItem(state->initial_regs, 4, stride_svp64);
+
+ // Load data into PyPowersim buffer from real memory
+ for (int i=0; i < 8; i++) {
+ for (int j=0; j < 8; j += 4) {
+ PyObject *svp64_address = PyLong_FromUnsignedLongLong(img_svp64 + j*2);
+ uint64_t val = (uint64_t)(img[j + 0]) & 0xffff;
+ val |= ((uint64_t)(img[j + 1]) & 0xffff) << 16;
+ val |= ((uint64_t)(img[j + 2]) & 0xffff) << 32;
+ val |= ((uint64_t)(img[j + 3]) & 0xffff) << 48;
+ printf("img: %p -> %04x %04x %04x %04x\t val: %016lx -> %p\n", img + j, (uint16_t)img[j + 0], (uint16_t)img[j + 1], (uint16_t)img[j + 2], (uint16_t)img[j + 3], val, img_svp64 + j*2);
+/*
+ uint64_t val = (uint64_t)(img[0]) & 0xff;
+ val |= ((uint64_t)(img[1]) & 0xff) << 8;
+ val |= ((uint64_t)(img[2]) & 0xff) << 16;
+ val |= ((uint64_t)(img[3]) & 0xff) << 24;
+ val |= ((uint64_t)(img[4]) & 0xff) << 32;
+ val |= ((uint64_t)(img[5]) & 0xff) << 40;
+ val |= ((uint64_t)(img[6]) & 0xff) << 48;
+ val |= ((uint64_t)(img[7]) & 0xff) << 56;
+ printf("src: %p -> %02x %02x %02x %02x %02x %02x %02x %02x\t val: %016lx -> %p\n", img, (uint8_t)img[0], (uint8_t)img[1], (uint8_t)img[2], (uint8_t)img[3], (uint8_t)img[4], (uint8_t)img[5], (uint8_t)img[6], (uint8_t)img[7], val, img_svp64);*/
+ PyObject *word = PyLong_FromUnsignedLongLong(val);
+ PyDict_SetItem(state->initial_mem, svp64_address, word);
+ }
+ img += stride/2;
+ img_svp64 += stride;
+ }
+
+ // Set GPR #5 to the var pointer, and clear the address
+ PyObject *var_address = PyLong_FromUnsignedLongLong(var_svp64);
+ PyList_SetItem(state->initial_regs, 5, img_address);
+ {
+ PyObject *svp64_address = PyLong_FromUnsignedLongLong(var_svp64);
+ PyObject *word = PyLong_FromUnsignedLongLong(0);
+ PyDict_SetItem(state->initial_mem, svp64_address, word);
+ }
+
+#if BITDEPTH == 16
+ const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
+ PyObject *bitdepth = PyLong_FromUnsignedLongLong(bitdepth_min_8);
+ PyList_SetItem(state->initial_regs, 6, bitdepth);
+#endif
+
+ // Prepare the arguments object for the call
+ pypowersim_prepareargs(state);
+
+ // Call the function and get the resulting object
+ state->result_obj = PyObject_CallObject(state->simulator, state->args);
+ if (!state->result_obj) {
+ PyErr_Print();
+ printf("Error invoking 'run_a_simulation'\n");
+ pypowersim_finalize(state);
+ exit(1);
+ }
+
+ // Get the GPRs from the result_obj
+ PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr");
+ if (!final_regs) {
+ PyErr_Print();
+ printf("Error getting final GPRs\n");
+ pypowersim_finalize(state);
+ exit(1);
+ }
+
+ PyObject *memobj = PyObject_GetAttrString(state->result_obj, "mem");
+ if (!memobj) {
+ PyErr_Print();
+ Py_DECREF(state->result_obj);
+ printf("Error getting mem object\n");
+ }
+
+ PyObject *mem = PyObject_GetAttrString(memobj, "mem");
+ if (!mem) {
+ PyErr_Print();
+ Py_DECREF(state->result_obj);
+ printf("Error getting mem dict\n");
+ }
+ {
+ PyObject *svp64_address = PyLong_FromUnsignedLongLong((var_svp64)/8);
+ PyObject *pyval = PyDict_GetItem(mem, svp64_address);
+ uint64_t val = PyLong_AsUnsignedLongLong(pyval);
+ *var = (uint32_t) val;
+ printf("output: %p -> %08x\t val: %016lx -> %p\n", var, *var, val, var_svp64);
+ }
+
+ // GPR #3 holds the return value as an integer
+ PyObject *key = PyLong_FromLongLong(3);
+ PyObject *itm = PyDict_GetItem(final_regs, key);
+ if (!itm) {
+ PyErr_Print();
+ printf("Error getting GPR #3\n");
+ pypowersim_finalize(state);
+ exit(1);
+ }
+ PyObject *value = PyObject_GetAttrString(itm, "value");
+ if (!value) {
+ PyErr_Print();
+ printf("Error getting value of GPR #3\n");
+ pypowersim_finalize(state);
+ exit(1);
+ }
+ uint64_t val = PyLong_AsUnsignedLongLong(value);
+
+ // Return value
+ return (uint32_t) val;
+}
--- /dev/null
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * Copyright © 2019, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "common/attributes.h"
+
+#include "src/ppc/cpu.h"
+
+#if (defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)) && ARCH_PPC64LE
+#include <sys/auxv.h>
+#define HAVE_AUX
+#endif
+
+COLD unsigned dav1d_get_cpu_flags_ppc(void) {
+ unsigned flags = 0;
+#if defined(HAVE_GETAUXVAL) && ARCH_PPC64LE
+ unsigned long hw_cap = getauxval(AT_HWCAP);
+#elif defined(HAVE_ELF_AUX_INFO) && ARCH_PPC64LE
+ unsigned long hw_cap = 0;
+ elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap));
+#endif
+#ifdef HAVE_AUX
+ flags |= (hw_cap & PPC_FEATURE_HAS_VSX) ? DAV1D_PPC_CPU_FLAG_VSX : 0;
+#endif
+#ifdef HAVE_SVP64
+ flags |= DAV1D_PPC_CPU_FLAG_SVP64;
+#endif
+ return flags;
+}
--- /dev/null
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * Copyright © 2019, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_PPC_CPU_H
+#define DAV1D_SRC_PPC_CPU_H
+
+enum CpuFlags {
+ DAV1D_PPC_CPU_FLAG_VSX = 1 << 0,
+ DAV1D_PPC_CPU_FLAG_SVP64 = 1 << 1,
+};
+
+unsigned dav1d_get_cpu_flags_ppc(void);
+
+#endif /* DAV1D_SRC_PPC_CPU_H */
--- /dev/null
+/*
+ * Copyright © 2019, VideoLAN and dav1d authors
+ * Copyright © 2019, Luca Barbato
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_PPC_TYPES_H
+#define DAV1D_SRC_PPC_TYPES_H
+
+#include <altivec.h>
+#undef pixel
+
+#define u8x16 vector unsigned char
+#define i8x16 vector signed char
+#define b8x16 vector bool char
+#define u16x8 vector unsigned short
+#define i16x8 vector signed short
+#define b16x8 vector bool short
+#define u32x4 vector unsigned int
+#define i32x4 vector signed int
+#define b32x4 vector bool int
+#define u64x2 vector unsigned long long
+#define i64x2 vector signed long long
+#define b64x2 vector bool long long
+
+#define u8h_to_u16(v) ((u16x8) vec_mergeh((u8x16) v, vec_splat_u8(0)))
+#define u8l_to_u16(v) ((u16x8) vec_mergel((u8x16) v, vec_splat_u8(0)))
+#define u16h_to_i32(v) ((i32x4) vec_mergeh((u16x8) v, vec_splat_u16(0)))
+#define i16h_to_i32(v) ((i32x4) vec_unpackh((i16x8)v))
+#define u16l_to_i32(v) ((i32x4) vec_mergel((u16x8) v, vec_splat_u16(0)))
+#define i16l_to_i32(v) ((i32x4) vec_unpackl((i16x8)v))
+
+#endif /* DAV1D_SRC_PPC_TYPES_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_REF_H
+#define DAV1D_SRC_REF_H
+
+#include "dav1d/dav1d.h"
+
+#include "src/mem.h"
+#include "src/thread.h"
+
+#include <stdatomic.h>
+#include <stddef.h>
+
+struct Dav1dRef {
+ void *data;
+ const void *const_data;
+ atomic_int ref_cnt;
+ int free_ref;
+ void (*free_callback)(const uint8_t *data, void *user_data);
+ void *user_data;
+};
+
+Dav1dRef *dav1d_ref_create(size_t size);
+Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *pool, size_t size);
+Dav1dRef *dav1d_ref_wrap(const uint8_t *ptr,
+ void (*free_callback)(const uint8_t *data, void *user_data),
+ void *user_data);
+void dav1d_ref_dec(Dav1dRef **ref);
+int dav1d_ref_is_writable(Dav1dRef *ref);
+
+static inline void dav1d_ref_inc(Dav1dRef *const ref) {
+ atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed);
+}
+
+#endif /* DAV1D_SRC_REF_H */
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "common/attributes.h"
+
+#include "src/levels.h"
+#include "src/tables.h"
+
+const uint8_t dav1d_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS] = {
+ {
+ // partitions:
+ // none, h, v, split, tts, tbs, tls, trs, h4, v4
+ { 0x00, 0x00, 0x10, -1, 0x00, 0x10, 0x10, 0x10, -1, -1 }, // bl128
+ { 0x10, 0x10, 0x18, -1, 0x10, 0x18, 0x18, 0x18, 0x10, 0x1c }, // bl64
+ { 0x18, 0x18, 0x1c, -1, 0x18, 0x1c, 0x1c, 0x1c, 0x18, 0x1e }, // bl32
+ { 0x1c, 0x1c, 0x1e, -1, 0x1c, 0x1e, 0x1e, 0x1e, 0x1c, 0x1f }, // bl16
+ { 0x1e, 0x1e, 0x1f, 0x1f, -1, -1, -1, -1, -1, -1 }, // bl8
+ }, {
+ { 0x00, 0x10, 0x00, -1, 0x10, 0x10, 0x00, 0x10, -1, -1 }, // bl128
+ { 0x10, 0x18, 0x10, -1, 0x18, 0x18, 0x10, 0x18, 0x1c, 0x10 }, // bl64
+ { 0x18, 0x1c, 0x18, -1, 0x1c, 0x1c, 0x18, 0x1c, 0x1e, 0x18 }, // bl32
+ { 0x1c, 0x1e, 0x1c, -1, 0x1e, 0x1e, 0x1c, 0x1e, 0x1f, 0x1c }, // bl16
+ { 0x1e, 0x1f, 0x1e, 0x1f, -1, -1, -1, -1, -1, -1 }, // bl8
+ }
+};
+
+const uint8_t /* enum BlockSize */
+ dav1d_block_sizes[N_BL_LEVELS][N_PARTITIONS][2] =
+{
+ [BL_128X128] = {
+ [PARTITION_NONE] = { BS_128x128 },
+ [PARTITION_H] = { BS_128x64 },
+ [PARTITION_V] = { BS_64x128 },
+ [PARTITION_T_TOP_SPLIT] = { BS_64x64, BS_128x64 },
+ [PARTITION_T_BOTTOM_SPLIT] = { BS_128x64, BS_64x64 },
+ [PARTITION_T_LEFT_SPLIT] = { BS_64x64, BS_64x128 },
+ [PARTITION_T_RIGHT_SPLIT] = { BS_64x128, BS_64x64 },
+ }, [BL_64X64] = {
+ [PARTITION_NONE] = { BS_64x64 },
+ [PARTITION_H] = { BS_64x32 },
+ [PARTITION_V] = { BS_32x64 },
+ [PARTITION_T_TOP_SPLIT] = { BS_32x32, BS_64x32 },
+ [PARTITION_T_BOTTOM_SPLIT] = { BS_64x32, BS_32x32 },
+ [PARTITION_T_LEFT_SPLIT] = { BS_32x32, BS_32x64 },
+ [PARTITION_T_RIGHT_SPLIT] = { BS_32x64, BS_32x32 },
+ [PARTITION_H4] = { BS_64x16 },
+ [PARTITION_V4] = { BS_16x64 },
+ }, [BL_32X32] = {
+ [PARTITION_NONE] = { BS_32x32 },
+ [PARTITION_H] = { BS_32x16 },
+ [PARTITION_V] = { BS_16x32 },
+ [PARTITION_T_TOP_SPLIT] = { BS_16x16, BS_32x16 },
+ [PARTITION_T_BOTTOM_SPLIT] = { BS_32x16, BS_16x16 },
+ [PARTITION_T_LEFT_SPLIT] = { BS_16x16, BS_16x32 },
+ [PARTITION_T_RIGHT_SPLIT] = { BS_16x32, BS_16x16 },
+ [PARTITION_H4] = { BS_32x8 },
+ [PARTITION_V4] = { BS_8x32 },
+ }, [BL_16X16] = {
+ [PARTITION_NONE] = { BS_16x16 },
+ [PARTITION_H] = { BS_16x8 },
+ [PARTITION_V] = { BS_8x16 },
+ [PARTITION_T_TOP_SPLIT] = { BS_8x8, BS_16x8 },
+ [PARTITION_T_BOTTOM_SPLIT] = { BS_16x8, BS_8x8 },
+ [PARTITION_T_LEFT_SPLIT] = { BS_8x8, BS_8x16 },
+ [PARTITION_T_RIGHT_SPLIT] = { BS_8x16, BS_8x8 },
+ [PARTITION_H4] = { BS_16x4 },
+ [PARTITION_V4] = { BS_4x16 },
+ }, [BL_8X8] = {
+ [PARTITION_NONE] = { BS_8x8 },
+ [PARTITION_H] = { BS_8x4 },
+ [PARTITION_V] = { BS_4x8 },
+ [PARTITION_SPLIT] = { BS_4x4 },
+ }
+};
+
+const uint8_t dav1d_block_dimensions[N_BS_SIZES][4] = {
+ [BS_128x128] = { 32, 32, 5, 5 },
+ [BS_128x64] = { 32, 16, 5, 4 },
+ [BS_64x128] = { 16, 32, 4, 5 },
+ [BS_64x64] = { 16, 16, 4, 4 },
+ [BS_64x32] = { 16, 8, 4, 3 },
+ [BS_64x16] = { 16, 4, 4, 2 },
+ [BS_32x64] = { 8, 16, 3, 4 },
+ [BS_32x32] = { 8, 8, 3, 3 },
+ [BS_32x16] = { 8, 4, 3, 2 },
+ [BS_32x8] = { 8, 2, 3, 1 },
+ [BS_16x64] = { 4, 16, 2, 4 },
+ [BS_16x32] = { 4, 8, 2, 3 },
+ [BS_16x16] = { 4, 4, 2, 2 },
+ [BS_16x8] = { 4, 2, 2, 1 },
+ [BS_16x4] = { 4, 1, 2, 0 },
+ [BS_8x32] = { 2, 8, 1, 3 },
+ [BS_8x16] = { 2, 4, 1, 2 },
+ [BS_8x8] = { 2, 2, 1, 1 },
+ [BS_8x4] = { 2, 1, 1, 0 },
+ [BS_4x16] = { 1, 4, 0, 2 },
+ [BS_4x8] = { 1, 2, 0, 1 },
+ [BS_4x4] = { 1, 1, 0, 0 },
+};
+
+const TxfmInfo dav1d_txfm_dimensions[N_RECT_TX_SIZES] = {
+ [ TX_4X4] = { .w = 1, .h = 1, .lw = 0, .lh = 0,
+ .min = 0, .max = 0, .ctx = 0 },
+ [ TX_8X8] = { .w = 2, .h = 2, .lw = 1, .lh = 1,
+ .min = 1, .max = 1, .sub = TX_4X4, .ctx = 1 },
+ [ TX_16X16] = { .w = 4, .h = 4, .lw = 2, .lh = 2,
+ .min = 2, .max = 2, .sub = TX_8X8, .ctx = 2 },
+ [ TX_32X32] = { .w = 8, .h = 8, .lw = 3, .lh = 3,
+ .min = 3, .max = 3, .sub = TX_16X16, .ctx = 3 },
+ [ TX_64X64] = { .w = 16, .h = 16, .lw = 4, .lh = 4,
+ .min = 4, .max = 4, .sub = TX_32X32, .ctx = 4 },
+ [RTX_4X8] = { .w = 1, .h = 2, .lw = 0, .lh = 1,
+ .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 },
+ [RTX_8X4] = { .w = 2, .h = 1, .lw = 1, .lh = 0,
+ .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 },
+ [RTX_8X16] = { .w = 2, .h = 4, .lw = 1, .lh = 2,
+ .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 },
+ [RTX_16X8] = { .w = 4, .h = 2, .lw = 2, .lh = 1,
+ .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 },
+ [RTX_16X32] = { .w = 4, .h = 8, .lw = 2, .lh = 3,
+ .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 },
+ [RTX_32X16] = { .w = 8, .h = 4, .lw = 3, .lh = 2,
+ .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 },
+ [RTX_32X64] = { .w = 8, .h = 16, .lw = 3, .lh = 4,
+ .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 },
+ [RTX_64X32] = { .w = 16, .h = 8, .lw = 4, .lh = 3,
+ .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 },
+ [RTX_4X16] = { .w = 1, .h = 4, .lw = 0, .lh = 2,
+ .min = 0, .max = 2, .sub = RTX_4X8, .ctx = 1 },
+ [RTX_16X4] = { .w = 4, .h = 1, .lw = 2, .lh = 0,
+ .min = 0, .max = 2, .sub = RTX_8X4, .ctx = 1 },
+ [RTX_8X32] = { .w = 2, .h = 8, .lw = 1, .lh = 3,
+ .min = 1, .max = 3, .sub = RTX_8X16, .ctx = 2 },
+ [RTX_32X8] = { .w = 8, .h = 2, .lw = 3, .lh = 1,
+ .min = 1, .max = 3, .sub = RTX_16X8, .ctx = 2 },
+ [RTX_16X64] = { .w = 4, .h = 16, .lw = 2, .lh = 4,
+ .min = 2, .max = 4, .sub = RTX_16X32, .ctx = 3 },
+ [RTX_64X16] = { .w = 16, .h = 4, .lw = 4, .lh = 2,
+ .min = 2, .max = 4, .sub = RTX_32X16, .ctx = 3 },
+};
+
+const uint8_t /* enum (Rect)TxfmSize */
+ dav1d_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */] =
+{
+ [BS_128x128] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 },
+ [BS_128x64] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 },
+ [BS_64x128] = { TX_64X64, TX_32X32, 0, TX_32X32 },
+ [BS_64x64] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 },
+ [BS_64x32] = { RTX_64X32, RTX_32X16, TX_32X32, TX_32X32 },
+ [BS_64x16] = { RTX_64X16, RTX_32X8, RTX_32X16, RTX_32X16 },
+ [BS_32x64] = { RTX_32X64, RTX_16X32, 0, TX_32X32 },
+ [BS_32x32] = { TX_32X32, TX_16X16, RTX_16X32, TX_32X32 },
+ [BS_32x16] = { RTX_32X16, RTX_16X8, TX_16X16, RTX_32X16 },
+ [BS_32x8] = { RTX_32X8, RTX_16X4, RTX_16X8, RTX_32X8 },
+ [BS_16x64] = { RTX_16X64, RTX_8X32, 0, RTX_16X32 },
+ [BS_16x32] = { RTX_16X32, RTX_8X16, 0, RTX_16X32 },
+ [BS_16x16] = { TX_16X16, TX_8X8, RTX_8X16, TX_16X16 },
+ [BS_16x8] = { RTX_16X8, RTX_8X4, TX_8X8, RTX_16X8 },
+ [BS_16x4] = { RTX_16X4, RTX_8X4, RTX_8X4, RTX_16X4 },
+ [BS_8x32] = { RTX_8X32, RTX_4X16, 0, RTX_8X32 },
+ [BS_8x16] = { RTX_8X16, RTX_4X8, 0, RTX_8X16 },
+ [BS_8x8] = { TX_8X8, TX_4X4, RTX_4X8, TX_8X8 },
+ [BS_8x4] = { RTX_8X4, TX_4X4, TX_4X4, RTX_8X4 },
+ [BS_4x16] = { RTX_4X16, RTX_4X8, 0, RTX_4X16 },
+ [BS_4x8] = { RTX_4X8, TX_4X4, 0, RTX_4X8 },
+ [BS_4x4] = { TX_4X4, TX_4X4, TX_4X4, TX_4X4 },
+};
+
+const uint8_t /* enum TxfmType */
+ dav1d_txtp_from_uvmode[N_UV_INTRA_PRED_MODES] =
+{
+ [DC_PRED] = DCT_DCT,
+ [VERT_PRED] = ADST_DCT,
+ [HOR_PRED] = DCT_ADST,
+ [DIAG_DOWN_LEFT_PRED] = DCT_DCT,
+ [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
+ [VERT_RIGHT_PRED] = ADST_DCT,
+ [HOR_DOWN_PRED] = DCT_ADST,
+ [HOR_UP_PRED] = DCT_ADST,
+ [VERT_LEFT_PRED] = ADST_DCT,
+ [SMOOTH_PRED] = ADST_ADST,
+ [SMOOTH_V_PRED] = ADST_DCT,
+ [SMOOTH_H_PRED] = DCT_ADST,
+ [PAETH_PRED] = ADST_ADST,
+};
+
+const uint8_t /* enum InterPredMode */
+ dav1d_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2] =
+{
+ [NEARESTMV_NEARESTMV] = { NEARESTMV, NEARESTMV },
+ [NEARMV_NEARMV] = { NEARMV, NEARMV },
+ [NEWMV_NEWMV] = { NEWMV, NEWMV },
+ [GLOBALMV_GLOBALMV] = { GLOBALMV, GLOBALMV },
+ [NEWMV_NEARESTMV] = { NEWMV, NEARESTMV },
+ [NEWMV_NEARMV] = { NEWMV, NEARMV },
+ [NEARESTMV_NEWMV] = { NEARESTMV, NEWMV },
+ [NEARMV_NEWMV] = { NEARMV, NEWMV },
+};
+
+const uint8_t dav1d_partition_type_count[N_BL_LEVELS] = {
+ [BL_128X128] = N_PARTITIONS - 3,
+ [BL_64X64] = N_PARTITIONS - 1,
+ [BL_32X32] = N_PARTITIONS - 1,
+ [BL_16X16] = N_PARTITIONS - 1,
+ [BL_8X8] = N_SUB8X8_PARTITIONS - 1,
+};
+
+const uint8_t /* enum TxfmType */ dav1d_tx_types_per_set[40] = {
+ /* Intra2 */
+ IDTX, DCT_DCT, ADST_ADST, ADST_DCT, DCT_ADST,
+ /* Intra1 */
+ IDTX, DCT_DCT, V_DCT, H_DCT, ADST_ADST, ADST_DCT, DCT_ADST,
+ /* Inter2 */
+ IDTX, V_DCT, H_DCT, DCT_DCT, ADST_DCT, DCT_ADST, FLIPADST_DCT,
+ DCT_FLIPADST, ADST_ADST, FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST,
+ /* Inter1 */
+ IDTX, V_DCT, H_DCT, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST,
+ DCT_DCT, ADST_DCT, DCT_ADST, FLIPADST_DCT, DCT_FLIPADST,
+ ADST_ADST, FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST,
+};
+
+const uint8_t dav1d_ymode_size_context[N_BS_SIZES] = {
+ [BS_128x128] = 3,
+ [BS_128x64] = 3,
+ [BS_64x128] = 3,
+ [BS_64x64] = 3,
+ [BS_64x32] = 3,
+ [BS_64x16] = 2,
+ [BS_32x64] = 3,
+ [BS_32x32] = 3,
+ [BS_32x16] = 2,
+ [BS_32x8 ] = 1,
+ [BS_16x64] = 2,
+ [BS_16x32] = 2,
+ [BS_16x16] = 2,
+ [BS_16x8 ] = 1,
+ [BS_16x4 ] = 0,
+ [BS_8x32 ] = 1,
+ [BS_8x16 ] = 1,
+ [BS_8x8 ] = 1,
+ [BS_8x4 ] = 0,
+ [BS_4x16 ] = 0,
+ [BS_4x8 ] = 0,
+ [BS_4x4 ] = 0,
+};
+
+const uint8_t dav1d_lo_ctx_offsets[3][5][5] = {
+ { /* w == h */
+ { 0, 1, 6, 6, 21 },
+ { 1, 6, 6, 21, 21 },
+ { 6, 6, 21, 21, 21 },
+ { 6, 21, 21, 21, 21 },
+ { 21, 21, 21, 21, 21 },
+ }, { /* w > h */
+ { 0, 16, 6, 6, 21 },
+ { 16, 16, 6, 21, 21 },
+ { 16, 16, 21, 21, 21 },
+ { 16, 16, 21, 21, 21 },
+ { 16, 16, 21, 21, 21 },
+ }, { /* w < h */
+ { 0, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11 },
+ { 6, 6, 21, 21, 21 },
+ { 6, 21, 21, 21, 21 },
+ { 21, 21, 21, 21, 21 },
+ },
+};
+
+const uint8_t dav1d_skip_ctx[5][5] = {
+ { 1, 2, 2, 2, 3 },
+ { 2, 4, 4, 4, 5 },
+ { 2, 4, 4, 4, 5 },
+ { 2, 4, 4, 4, 5 },
+ { 3, 5, 5, 5, 6 },
+};
+
+const uint8_t /* enum TxClass */ dav1d_tx_type_class[N_TX_TYPES_PLUS_LL] = {
+ [DCT_DCT] = TX_CLASS_2D,
+ [ADST_DCT] = TX_CLASS_2D,
+ [DCT_ADST] = TX_CLASS_2D,
+ [ADST_ADST] = TX_CLASS_2D,
+ [FLIPADST_DCT] = TX_CLASS_2D,
+ [DCT_FLIPADST] = TX_CLASS_2D,
+ [FLIPADST_FLIPADST] = TX_CLASS_2D,
+ [ADST_FLIPADST] = TX_CLASS_2D,
+ [FLIPADST_ADST] = TX_CLASS_2D,
+ [IDTX] = TX_CLASS_2D,
+ [V_DCT] = TX_CLASS_V,
+ [H_DCT] = TX_CLASS_H,
+ [V_ADST] = TX_CLASS_V,
+ [H_ADST] = TX_CLASS_H,
+ [V_FLIPADST] = TX_CLASS_V,
+ [H_FLIPADST] = TX_CLASS_H,
+ [WHT_WHT] = TX_CLASS_2D,
+};
+
+const uint8_t /* enum Filter2d */ dav1d_filter_2d[DAV1D_N_FILTERS][DAV1D_N_FILTERS] = {
+ [DAV1D_FILTER_8TAP_REGULAR] = {
+ [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_REGULAR,
+ [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_REGULAR_SHARP,
+ [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_REGULAR_SMOOTH,
+ }, [DAV1D_FILTER_8TAP_SHARP] = {
+ [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SHARP_REGULAR,
+ [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SHARP,
+ [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SHARP_SMOOTH,
+ }, [DAV1D_FILTER_8TAP_SMOOTH] = {
+ [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SMOOTH_REGULAR,
+ [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SMOOTH_SHARP,
+ [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SMOOTH,
+ }, [DAV1D_FILTER_BILINEAR] = {
+ [DAV1D_FILTER_BILINEAR] = FILTER_2D_BILINEAR,
+ }
+};
+
+const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2] = {
+ [FILTER_2D_8TAP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR },
+ [FILTER_2D_8TAP_REGULAR_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR },
+ [FILTER_2D_8TAP_REGULAR_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR },
+ [FILTER_2D_8TAP_SHARP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP },
+ [FILTER_2D_8TAP_SHARP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP },
+ [FILTER_2D_8TAP_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP },
+ [FILTER_2D_8TAP_SMOOTH_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH },
+ [FILTER_2D_8TAP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH },
+ [FILTER_2D_8TAP_SMOOTH_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH },
+ [FILTER_2D_BILINEAR] = { DAV1D_FILTER_BILINEAR, DAV1D_FILTER_BILINEAR },
+};
+
+const uint8_t dav1d_filter_mode_to_y_mode[5] = {
+ DC_PRED, VERT_PRED, HOR_PRED, HOR_DOWN_PRED, DC_PRED
+};
+
+const uint8_t dav1d_intra_mode_context[N_INTRA_PRED_MODES] = {
+ [DC_PRED] = 0,
+ [VERT_PRED] = 1,
+ [HOR_PRED] = 2,
+ [DIAG_DOWN_LEFT_PRED] = 3,
+ [DIAG_DOWN_RIGHT_PRED] = 4,
+ [VERT_RIGHT_PRED] = 4,
+ [HOR_DOWN_PRED] = 4,
+ [HOR_UP_PRED] = 4,
+ [VERT_LEFT_PRED] = 3,
+ [SMOOTH_PRED] = 0,
+ [SMOOTH_V_PRED] = 1,
+ [SMOOTH_H_PRED] = 2,
+ [PAETH_PRED] = 0,
+};
+
+const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES] = {
+ [BS_32x32] = 6,
+ [BS_32x16] = 5,
+ [BS_32x8] = 8,
+ [BS_16x32] = 4,
+ [BS_16x16] = 3,
+ [BS_16x8] = 2,
+ [BS_8x32] = 7,
+ [BS_8x16] = 1,
+ [BS_8x8] = 0,
+};
+
+const Dav1dWarpedMotionParams dav1d_default_wm_params = {
+ .type = DAV1D_WM_TYPE_IDENTITY,
+ .matrix = {
+ 0, 0, 1 << 16,
+ 0, 0, 1 << 16,
+ },
+ .u.p.alpha = 0,
+ .u.p.beta = 0,
+ .u.p.gamma = 0,
+ .u.p.delta = 0,
+};
+
+const int8_t dav1d_cdef_directions[2 + 8 + 2 /* dir */][2 /* pass */] = {
+ { 1 * 12 + 0, 2 * 12 + 0 }, // 6
+ { 1 * 12 + 0, 2 * 12 - 1 }, // 7
+ { -1 * 12 + 1, -2 * 12 + 2 }, // 0
+ { 0 * 12 + 1, -1 * 12 + 2 }, // 1
+ { 0 * 12 + 1, 0 * 12 + 2 }, // 2
+ { 0 * 12 + 1, 1 * 12 + 2 }, // 3
+ { 1 * 12 + 1, 2 * 12 + 2 }, // 4
+ { 1 * 12 + 0, 2 * 12 + 1 }, // 5
+ { 1 * 12 + 0, 2 * 12 + 0 }, // 6
+ { 1 * 12 + 0, 2 * 12 - 1 }, // 7
+ { -1 * 12 + 1, -2 * 12 + 2 }, // 0
+ { 0 * 12 + 1, -1 * 12 + 2 }, // 1
+};
+
+const uint16_t ALIGN(dav1d_sgr_params[16][2], 4) = {
+ { 140, 3236 }, { 112, 2158 }, { 93, 1618 }, { 80, 1438 },
+ { 70, 1295 }, { 58, 1177 }, { 47, 1079 }, { 37, 996 },
+ { 30, 925 }, { 25, 863 }, { 0, 2589 }, { 0, 1618 },
+ { 0, 1177 }, { 0, 925 }, { 56, 0 }, { 22, 0 },
+};
+
+const uint8_t ALIGN(dav1d_sgr_x_by_x[256], 64) = {
+ 255, 128, 85, 64, 51, 43, 37, 32, 28, 26, 23, 21, 20, 18, 17,
+ 16, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9,
+ 8, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6,
+ 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0
+};
+
+const int8_t ALIGN(dav1d_mc_subpel_filters[6][15][8], 8) = {
+ [DAV1D_FILTER_8TAP_REGULAR] = {
+ { 0, 1, -3, 63, 4, -1, 0, 0 },
+ { 0, 1, -5, 61, 9, -2, 0, 0 },
+ { 0, 1, -6, 58, 14, -4, 1, 0 },
+ { 0, 1, -7, 55, 19, -5, 1, 0 },
+ { 0, 1, -7, 51, 24, -6, 1, 0 },
+ { 0, 1, -8, 47, 29, -6, 1, 0 },
+ { 0, 1, -7, 42, 33, -6, 1, 0 },
+ { 0, 1, -7, 38, 38, -7, 1, 0 },
+ { 0, 1, -6, 33, 42, -7, 1, 0 },
+ { 0, 1, -6, 29, 47, -8, 1, 0 },
+ { 0, 1, -6, 24, 51, -7, 1, 0 },
+ { 0, 1, -5, 19, 55, -7, 1, 0 },
+ { 0, 1, -4, 14, 58, -6, 1, 0 },
+ { 0, 0, -2, 9, 61, -5, 1, 0 },
+ { 0, 0, -1, 4, 63, -3, 1, 0 }
+ }, [DAV1D_FILTER_8TAP_SMOOTH] = {
+ { 0, 1, 14, 31, 17, 1, 0, 0 },
+ { 0, 0, 13, 31, 18, 2, 0, 0 },
+ { 0, 0, 11, 31, 20, 2, 0, 0 },
+ { 0, 0, 10, 30, 21, 3, 0, 0 },
+ { 0, 0, 9, 29, 22, 4, 0, 0 },
+ { 0, 0, 8, 28, 23, 5, 0, 0 },
+ { 0, -1, 8, 27, 24, 6, 0, 0 },
+ { 0, -1, 7, 26, 26, 7, -1, 0 },
+ { 0, 0, 6, 24, 27, 8, -1, 0 },
+ { 0, 0, 5, 23, 28, 8, 0, 0 },
+ { 0, 0, 4, 22, 29, 9, 0, 0 },
+ { 0, 0, 3, 21, 30, 10, 0, 0 },
+ { 0, 0, 2, 20, 31, 11, 0, 0 },
+ { 0, 0, 2, 18, 31, 13, 0, 0 },
+ { 0, 0, 1, 17, 31, 14, 1, 0 }
+ }, [DAV1D_FILTER_8TAP_SHARP] = {
+ { -1, 1, -3, 63, 4, -1, 1, 0 },
+ { -1, 3, -6, 62, 8, -3, 2, -1 },
+ { -1, 4, -9, 60, 13, -5, 3, -1 },
+ { -2, 5, -11, 58, 19, -7, 3, -1 },
+ { -2, 5, -11, 54, 24, -9, 4, -1 },
+ { -2, 5, -12, 50, 30, -10, 4, -1 },
+ { -2, 5, -12, 45, 35, -11, 5, -1 },
+ { -2, 6, -12, 40, 40, -12, 6, -2 },
+ { -1, 5, -11, 35, 45, -12, 5, -2 },
+ { -1, 4, -10, 30, 50, -12, 5, -2 },
+ { -1, 4, -9, 24, 54, -11, 5, -2 },
+ { -1, 3, -7, 19, 58, -11, 5, -2 },
+ { -1, 3, -5, 13, 60, -9, 4, -1 },
+ { -1, 2, -3, 8, 62, -6, 3, -1 },
+ { 0, 1, -1, 4, 63, -3, 1, -1 }
+ /* width <= 4 */
+ }, [3 + DAV1D_FILTER_8TAP_REGULAR] = {
+ { 0, 0, -2, 63, 4, -1, 0, 0 },
+ { 0, 0, -4, 61, 9, -2, 0, 0 },
+ { 0, 0, -5, 58, 14, -3, 0, 0 },
+ { 0, 0, -6, 55, 19, -4, 0, 0 },
+ { 0, 0, -6, 51, 24, -5, 0, 0 },
+ { 0, 0, -7, 47, 29, -5, 0, 0 },
+ { 0, 0, -6, 42, 33, -5, 0, 0 },
+ { 0, 0, -6, 38, 38, -6, 0, 0 },
+ { 0, 0, -5, 33, 42, -6, 0, 0 },
+ { 0, 0, -5, 29, 47, -7, 0, 0 },
+ { 0, 0, -5, 24, 51, -6, 0, 0 },
+ { 0, 0, -4, 19, 55, -6, 0, 0 },
+ { 0, 0, -3, 14, 58, -5, 0, 0 },
+ { 0, 0, -2, 9, 61, -4, 0, 0 },
+ { 0, 0, -1, 4, 63, -2, 0, 0 }
+ }, [3 + DAV1D_FILTER_8TAP_SMOOTH] = {
+ { 0, 0, 15, 31, 17, 1, 0, 0 },
+ { 0, 0, 13, 31, 18, 2, 0, 0 },
+ { 0, 0, 11, 31, 20, 2, 0, 0 },
+ { 0, 0, 10, 30, 21, 3, 0, 0 },
+ { 0, 0, 9, 29, 22, 4, 0, 0 },
+ { 0, 0, 8, 28, 23, 5, 0, 0 },
+ { 0, 0, 7, 27, 24, 6, 0, 0 },
+ { 0, 0, 6, 26, 26, 6, 0, 0 },
+ { 0, 0, 6, 24, 27, 7, 0, 0 },
+ { 0, 0, 5, 23, 28, 8, 0, 0 },
+ { 0, 0, 4, 22, 29, 9, 0, 0 },
+ { 0, 0, 3, 21, 30, 10, 0, 0 },
+ { 0, 0, 2, 20, 31, 11, 0, 0 },
+ { 0, 0, 2, 18, 31, 13, 0, 0 },
+ { 0, 0, 1, 17, 31, 15, 0, 0 }
+ /* Bilin scaled being very rarely used, add a new table entry
+ * and use the put/prep_8tap_scaled code, thus acting as a
+ * scaled bilinear filter. */
+ }, [5] = {
+ { 0, 0, 0, 60, 4, 0, 0, 0 },
+ { 0, 0, 0, 56, 8, 0, 0, 0 },
+ { 0, 0, 0, 52, 12, 0, 0, 0 },
+ { 0, 0, 0, 48, 16, 0, 0, 0 },
+ { 0, 0, 0, 44, 20, 0, 0, 0 },
+ { 0, 0, 0, 40, 24, 0, 0, 0 },
+ { 0, 0, 0, 36, 28, 0, 0, 0 },
+ { 0, 0, 0, 32, 32, 0, 0, 0 },
+ { 0, 0, 0, 28, 36, 0, 0, 0 },
+ { 0, 0, 0, 24, 40, 0, 0, 0 },
+ { 0, 0, 0, 20, 44, 0, 0, 0 },
+ { 0, 0, 0, 16, 48, 0, 0, 0 },
+ { 0, 0, 0, 12, 52, 0, 0, 0 },
+ { 0, 0, 0, 8, 56, 0, 0, 0 },
+ { 0, 0, 0, 4, 60, 0, 0, 0 }
+ }
+};
+
+const int8_t ALIGN(dav1d_mc_warp_filter[193][8], 8) = {
+ // [-1, 0)
+ { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, -1, 127, 2, 0, 0, 0, 0 },
+ { 1, -3, 127, 4, - 1, 0, 0, 0 }, { 1, -4, 126, 6, -2, 1, 0, 0 },
+ { 1, -5, 126, 8, - 3, 1, 0, 0 }, { 1, -6, 125, 11, -4, 1, 0, 0 },
+ { 1, -7, 124, 13, - 4, 1, 0, 0 }, { 2, -8, 123, 15, -5, 1, 0, 0 },
+ { 2, -9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, -6, 1, 0, 0 },
+ { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, -8, 2, 0, 0 },
+ { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, -9, 2, 0, 0 },
+ { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 },
+ { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 },
+ { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 },
+ { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 },
+ { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 },
+ { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 },
+ { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 },
+ { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 },
+ { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 },
+ { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 },
+ { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 },
+ { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 },
+ { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 },
+ { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 },
+ { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 },
+ { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 },
+ { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 },
+ { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 },
+ { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 },
+ { 2, -8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 },
+ { 2, -7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 },
+ { 1, -6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 },
+ { 1, -4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 },
+ { 1, -3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 },
+ { 0, -1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 },
+ // [0, 1)
+ { 0, 0, 0, 127, 1, 0, 0, 0 }, { 0, 0, -1, 127, 2, 0, 0, 0 },
+ { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -5, 127, 6, -2, 1, 0 },
+ { 0, 2, -6, 126, 8, -3, 1, 0 }, { -1, 2, -7, 126, 11, -4, 2, -1 },
+ { -1, 3, -8, 125, 13, -5, 2, -1 }, { -1, 3, -10, 124, 16, -6, 3, -1 },
+ { -1, 4, -11, 123, 18, -7, 3, -1 }, { -1, 4, -12, 122, 20, -7, 3, -1 },
+ { -1, 4, -13, 121, 23, -8, 3, -1 }, { -2, 5, -14, 120, 25, -9, 4, -1 },
+ { -1, 5, -15, 119, 27, -10, 4, -1 }, { -1, 5, -16, 118, 30, -11, 4, -1 },
+ { -2, 6, -17, 116, 33, -12, 5, -1 }, { -2, 6, -17, 114, 35, -12, 5, -1 },
+ { -2, 6, -18, 113, 38, -13, 5, -1 }, { -2, 7, -19, 111, 41, -14, 6, -2 },
+ { -2, 7, -19, 110, 43, -15, 6, -2 }, { -2, 7, -20, 108, 46, -15, 6, -2 },
+ { -2, 7, -20, 106, 49, -16, 6, -2 }, { -2, 7, -21, 104, 51, -16, 7, -2 },
+ { -2, 7, -21, 102, 54, -17, 7, -2 }, { -2, 8, -21, 100, 56, -18, 7, -2 },
+ { -2, 8, -22, 98, 59, -18, 7, -2 }, { -2, 8, -22, 96, 62, -19, 7, -2 },
+ { -2, 8, -22, 94, 64, -19, 7, -2 }, { -2, 8, -22, 91, 67, -20, 8, -2 },
+ { -2, 8, -22, 89, 69, -20, 8, -2 }, { -2, 8, -22, 87, 72, -21, 8, -2 },
+ { -2, 8, -21, 84, 74, -21, 8, -2 }, { -2, 8, -22, 82, 77, -21, 8, -2 },
+ { -2, 8, -21, 79, 79, -21, 8, -2 }, { -2, 8, -21, 77, 82, -22, 8, -2 },
+ { -2, 8, -21, 74, 84, -21, 8, -2 }, { -2, 8, -21, 72, 87, -22, 8, -2 },
+ { -2, 8, -20, 69, 89, -22, 8, -2 }, { -2, 8, -20, 67, 91, -22, 8, -2 },
+ { -2, 7, -19, 64, 94, -22, 8, -2 }, { -2, 7, -19, 62, 96, -22, 8, -2 },
+ { -2, 7, -18, 59, 98, -22, 8, -2 }, { -2, 7, -18, 56, 100, -21, 8, -2 },
+ { -2, 7, -17, 54, 102, -21, 7, -2 }, { -2, 7, -16, 51, 104, -21, 7, -2 },
+ { -2, 6, -16, 49, 106, -20, 7, -2 }, { -2, 6, -15, 46, 108, -20, 7, -2 },
+ { -2, 6, -15, 43, 110, -19, 7, -2 }, { -2, 6, -14, 41, 111, -19, 7, -2 },
+ { -1, 5, -13, 38, 113, -18, 6, -2 }, { -1, 5, -12, 35, 114, -17, 6, -2 },
+ { -1, 5, -12, 33, 116, -17, 6, -2 }, { -1, 4, -11, 30, 118, -16, 5, -1 },
+ { -1, 4, -10, 27, 119, -15, 5, -1 }, { -1, 4, -9, 25, 120, -14, 5, -2 },
+ { -1, 3, -8, 23, 121, -13, 4, -1 }, { -1, 3, -7, 20, 122, -12, 4, -1 },
+ { -1, 3, -7, 18, 123, -11, 4, -1 }, { -1, 3, -6, 16, 124, -10, 3, -1 },
+ { -1, 2, -5, 13, 125, -8, 3, -1 }, { -1, 2, -4, 11, 126, -7, 2, -1 },
+ { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -2, 6, 127, -5, 1, 0 },
+ { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, 0, 2, 127, -1, 0, 0 },
+ // [1, 2)
+ { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, -1, 127, 2, 0, 0 },
+ { 0, 0, 1, -3, 127, 4, -1, 0 }, { 0, 0, 1, -4, 126, 6, -2, 1 },
+ { 0, 0, 1, -5, 126, 8, -3, 1 }, { 0, 0, 1, -6, 125, 11, -4, 1 },
+ { 0, 0, 1, -7, 124, 13, -4, 1 }, { 0, 0, 2, -8, 123, 15, -5, 1 },
+ { 0, 0, 2, -9, 122, 18, -6, 1 }, { 0, 0, 2, -10, 121, 20, -6, 1 },
+ { 0, 0, 2, -11, 120, 22, -7, 2 }, { 0, 0, 2, -12, 119, 25, -8, 2 },
+ { 0, 0, 3, -13, 117, 27, -8, 2 }, { 0, 0, 3, -13, 116, 29, -9, 2 },
+ { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 },
+ { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 },
+ { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 },
+ { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 },
+ { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 },
+ { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 },
+ { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 },
+ { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 },
+ { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 },
+ { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 },
+ { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 },
+ { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 },
+ { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 },
+ { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 },
+ { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 },
+ { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 },
+ { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 },
+ { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 },
+ { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, -9, 29, 116, -13, 3 },
+ { 0, 0, 2, -8, 27, 117, -13, 3 }, { 0, 0, 2, -8, 25, 119, -12, 2 },
+ { 0, 0, 2, -7, 22, 120, -11, 2 }, { 0, 0, 1, -6, 20, 121, -10, 2 },
+ { 0, 0, 1, -6, 18, 122, -9, 2 }, { 0, 0, 1, -5, 15, 123, -8, 2 },
+ { 0, 0, 1, -4, 13, 124, -7, 1 }, { 0, 0, 1, -4, 11, 125, -6, 1 },
+ { 0, 0, 1, -3, 8, 126, -5, 1 }, { 0, 0, 1, -2, 6, 126, -4, 1 },
+ { 0, 0, 0, -1, 4, 127, -3, 1 }, { 0, 0, 0, 0, 2, 127, -1, 0 },
+ // dummy (replicate row index 191)
+ { 0, 0, 0, 0, 2, 127, -1, 0 },
+};
+
+const int8_t ALIGN(dav1d_resize_filter[64][8], 8) = {
+ { 0, 0, 0, -128, 0, 0, 0, 0 }, { 0, 0, 1, -128, -2, 1, 0, 0 },
+ { 0, -1, 3, -127, -4, 2, -1, 0 }, { 0, -1, 4, -127, -6, 3, -1, 0 },
+ { 0, -2, 6, -126, -8, 3, -1, 0 }, { 0, -2, 7, -125, -11, 4, -1, 0 },
+ { 1, -2, 8, -125, -13, 5, -2, 0 }, { 1, -3, 9, -124, -15, 6, -2, 0 },
+ { 1, -3, 10, -123, -18, 6, -2, 1 }, { 1, -3, 11, -122, -20, 7, -3, 1 },
+ { 1, -4, 12, -121, -22, 8, -3, 1 }, { 1, -4, 13, -120, -25, 9, -3, 1 },
+ { 1, -4, 14, -118, -28, 9, -3, 1 }, { 1, -4, 15, -117, -30, 10, -4, 1 },
+ { 1, -5, 16, -116, -32, 11, -4, 1 }, { 1, -5, 16, -114, -35, 12, -4, 1 },
+ { 1, -5, 17, -112, -38, 12, -4, 1 }, { 1, -5, 18, -111, -40, 13, -5, 1 },
+ { 1, -5, 18, -109, -43, 14, -5, 1 }, { 1, -6, 19, -107, -45, 14, -5, 1 },
+ { 1, -6, 19, -105, -48, 15, -5, 1 }, { 1, -6, 19, -103, -51, 16, -5, 1 },
+ { 1, -6, 20, -101, -53, 16, -6, 1 }, { 1, -6, 20, -99, -56, 17, -6, 1 },
+ { 1, -6, 20, -97, -58, 17, -6, 1 }, { 1, -6, 20, -95, -61, 18, -6, 1 },
+ { 2, -7, 20, -93, -64, 18, -6, 2 }, { 2, -7, 20, -91, -66, 19, -6, 1 },
+ { 2, -7, 20, -88, -69, 19, -6, 1 }, { 2, -7, 20, -86, -71, 19, -6, 1 },
+ { 2, -7, 20, -84, -74, 20, -7, 2 }, { 2, -7, 20, -81, -76, 20, -7, 1 },
+ { 2, -7, 20, -79, -79, 20, -7, 2 }, { 1, -7, 20, -76, -81, 20, -7, 2 },
+ { 2, -7, 20, -74, -84, 20, -7, 2 }, { 1, -6, 19, -71, -86, 20, -7, 2 },
+ { 1, -6, 19, -69, -88, 20, -7, 2 }, { 1, -6, 19, -66, -91, 20, -7, 2 },
+ { 2, -6, 18, -64, -93, 20, -7, 2 }, { 1, -6, 18, -61, -95, 20, -6, 1 },
+ { 1, -6, 17, -58, -97, 20, -6, 1 }, { 1, -6, 17, -56, -99, 20, -6, 1 },
+ { 1, -6, 16, -53, -101, 20, -6, 1 }, { 1, -5, 16, -51, -103, 19, -6, 1 },
+ { 1, -5, 15, -48, -105, 19, -6, 1 }, { 1, -5, 14, -45, -107, 19, -6, 1 },
+ { 1, -5, 14, -43, -109, 18, -5, 1 }, { 1, -5, 13, -40, -111, 18, -5, 1 },
+ { 1, -4, 12, -38, -112, 17, -5, 1 }, { 1, -4, 12, -35, -114, 16, -5, 1 },
+ { 1, -4, 11, -32, -116, 16, -5, 1 }, { 1, -4, 10, -30, -117, 15, -4, 1 },
+ { 1, -3, 9, -28, -118, 14, -4, 1 }, { 1, -3, 9, -25, -120, 13, -4, 1 },
+ { 1, -3, 8, -22, -121, 12, -4, 1 }, { 1, -3, 7, -20, -122, 11, -3, 1 },
+ { 1, -2, 6, -18, -123, 10, -3, 1 }, { 0, -2, 6, -15, -124, 9, -3, 1 },
+ { 0, -2, 5, -13, -125, 8, -2, 1 }, { 0, -1, 4, -11, -125, 7, -2, 0 },
+ { 0, -1, 3, -8, -126, 6, -2, 0 }, { 0, -1, 3, -6, -127, 4, -1, 0 },
+ { 0, -1, 2, -4, -127, 3, -1, 0 }, { 0, 0, 1, -2, -128, 1, 0, 0 },
+};
+
+const uint8_t ALIGN(dav1d_sm_weights[128], 16) = {
+ // Unused, because we always offset by bs, which is at least 2.
+ 0, 0,
+ // bs = 2
+ 255, 128,
+ // bs = 4
+ 255, 149, 85, 64,
+ // bs = 8
+ 255, 197, 146, 105, 73, 50, 37, 32,
+ // bs = 16
+ 255, 225, 196, 170, 145, 123, 102, 84,
+ 68, 54, 43, 33, 26, 20, 17, 16,
+ // bs = 32
+ 255, 240, 225, 210, 196, 182, 169, 157,
+ 145, 133, 122, 111, 101, 92, 83, 74,
+ 66, 59, 52, 45, 39, 34, 29, 25,
+ 21, 17, 14, 12, 10, 9, 8, 8,
+ // bs = 64
+ 255, 248, 240, 233, 225, 218, 210, 203,
+ 196, 189, 182, 176, 169, 163, 156, 150,
+ 144, 138, 133, 127, 121, 116, 111, 106,
+ 101, 96, 91, 86, 82, 77, 73, 69,
+ 65, 61, 57, 54, 50, 47, 44, 41,
+ 38, 35, 32, 29, 27, 25, 22, 20,
+ 18, 16, 15, 13, 12, 10, 9, 8,
+ 7, 6, 6, 5, 5, 4, 4, 4
+};
+
+const uint16_t dav1d_dr_intra_derivative[44] = {
+ // Values that are 0 will never be used
+ 0, // Angles:
+ 1023, 0, // 3, 93, 183
+ 547, // 6, 96, 186
+ 372, 0, 0, // 9, 99, 189
+ 273, // 14, 104, 194
+ 215, 0, // 17, 107, 197
+ 178, // 20, 110, 200
+ 151, 0, // 23, 113, 203 (113 & 203 are base angles)
+ 132, // 26, 116, 206
+ 116, 0, // 29, 119, 209
+ 102, 0, // 32, 122, 212
+ 90, // 36, 126, 216
+ 80, 0, // 39, 129, 219
+ 71, // 42, 132, 222
+ 64, 0, // 45, 135, 225 (45 & 135 are base angles)
+ 57, // 48, 138, 228
+ 51, 0, // 51, 141, 231
+ 45, 0, // 54, 144, 234
+ 40, // 58, 148, 238
+ 35, 0, // 61, 151, 241
+ 31, // 64, 154, 244
+ 27, 0, // 67, 157, 247 (67 & 157 are base angles)
+ 23, // 70, 160, 250
+ 19, 0, // 73, 163, 253
+ 15, 0, // 76, 166, 256
+ 11, 0, // 81, 171, 261
+ 7, // 84, 174, 264
+ 3 // 87, 177, 267
+};
+
+#if ARCH_X86
+#define F(idx, f0, f1, f2, f3, f4, f5, f6) \
+ [2*idx+0] = f0, [2*idx+1] = f1, \
+ [2*idx+16] = f2, [2*idx+17] = f3, \
+ [2*idx+32] = f4, [2*idx+33] = f5, \
+ [2*idx+48] = f6
+#else
+#define F(idx, f0, f1, f2, f3, f4, f5, f6) \
+ [1*idx+0] = f0, [1*idx+8] = f1, \
+ [1*idx+16] = f2, [1*idx+24] = f3, \
+ [1*idx+32] = f4, [1*idx+40] = f5, \
+ [1*idx+48] = f6
+#endif
+const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 64) = {
+ {
+ F( 0, -6, 10, 0, 0, 0, 12, 0 ),
+ F( 1, -5, 2, 10, 0, 0, 9, 0 ),
+ F( 2, -3, 1, 1, 10, 0, 7, 0 ),
+ F( 3, -3, 1, 1, 2, 10, 5, 0 ),
+ F( 4, -4, 6, 0, 0, 0, 2, 12 ),
+ F( 5, -3, 2, 6, 0, 0, 2, 9 ),
+ F( 6, -3, 2, 2, 6, 0, 2, 7 ),
+ F( 7, -3, 1, 2, 2, 6, 3, 5 ),
+ }, {
+ F( 0, -10, 16, 0, 0, 0, 10, 0 ),
+ F( 1, -6, 0, 16, 0, 0, 6, 0 ),
+ F( 2, -4, 0, 0, 16, 0, 4, 0 ),
+ F( 3, -2, 0, 0, 0, 16, 2, 0 ),
+ F( 4, -10, 16, 0, 0, 0, 0, 10 ),
+ F( 5, -6, 0, 16, 0, 0, 0, 6 ),
+ F( 6, -4, 0, 0, 16, 0, 0, 4 ),
+ F( 7, -2, 0, 0, 0, 16, 0, 2 ),
+ }, {
+ F( 0, -8, 8, 0, 0, 0, 16, 0 ),
+ F( 1, -8, 0, 8, 0, 0, 16, 0 ),
+ F( 2, -8, 0, 0, 8, 0, 16, 0 ),
+ F( 3, -8, 0, 0, 0, 8, 16, 0 ),
+ F( 4, -4, 4, 0, 0, 0, 0, 16 ),
+ F( 5, -4, 0, 4, 0, 0, 0, 16 ),
+ F( 6, -4, 0, 0, 4, 0, 0, 16 ),
+ F( 7, -4, 0, 0, 0, 4, 0, 16 ),
+ }, {
+ F( 0, -2, 8, 0, 0, 0, 10, 0 ),
+ F( 1, -1, 3, 8, 0, 0, 6, 0 ),
+ F( 2, -1, 2, 3, 8, 0, 4, 0 ),
+ F( 3, 0, 1, 2, 3, 8, 2, 0 ),
+ F( 4, -1, 4, 0, 0, 0, 3, 10 ),
+ F( 5, -1, 3, 4, 0, 0, 4, 6 ),
+ F( 6, -1, 2, 3, 4, 0, 4, 4 ),
+ F( 7, -1, 2, 2, 3, 4, 3, 3 ),
+ }, {
+ F( 0, -12, 14, 0, 0, 0, 14, 0 ),
+ F( 1, -10, 0, 14, 0, 0, 12, 0 ),
+ F( 2, -9, 0, 0, 14, 0, 11, 0 ),
+ F( 3, -8, 0, 0, 0, 14, 10, 0 ),
+ F( 4, -10, 12, 0, 0, 0, 0, 14 ),
+ F( 5, -9, 1, 12, 0, 0, 0, 12 ),
+ F( 6, -8, 0, 0, 12, 0, 1, 11 ),
+ F( 7, -7, 0, 0, 1, 12, 1, 9 ),
+ }
+};
+
+const uint8_t ALIGN(dav1d_obmc_masks[64], 16) = {
+ /* Unused */
+ 0, 0,
+ /* 2 */
+ 19, 0,
+ /* 4 */
+ 25, 14, 5, 0,
+ /* 8 */
+ 28, 22, 16, 11, 7, 3, 0, 0,
+ /* 16 */
+ 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 0, 0, 0, 0,
+ /* 32 */
+ 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9,
+ 8, 7, 6, 5, 4, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+// Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512
+const int16_t dav1d_gaussian_sequence[2048] = {
+ 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
+ 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
+ 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
+ -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
+ 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
+ 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
+ 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
+ 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
+ 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
+ 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
+ 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
+ -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
+ 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
+ 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
+ -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
+ -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
+ -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
+ -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
+ 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
+ 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
+ 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
+ -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
+ -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
+ -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
+ 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
+ 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
+ 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
+ -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
+ 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
+ -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
+ 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
+ -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
+ 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
+ -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
+ -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
+ -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
+ -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
+ -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
+ 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
+ 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
+ -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
+ -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
+ 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
+ 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
+ -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
+ 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
+ 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
+ -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
+ 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
+ -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
+ 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
+ -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
+ -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
+ 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
+ -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
+ -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
+ 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
+ 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
+ -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
+ 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
+ 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
+ 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
+ -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
+ -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
+ -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
+ 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
+ -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
+ -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
+ -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
+ -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
+ -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
+ 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
+ -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
+ -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
+ 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
+ -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
+ -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
+ -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
+ 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
+ -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
+ 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
+ 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
+ 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
+ -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
+ -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
+ 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
+ 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
+ -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
+ -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
+ -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
+ -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
+ 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
+ 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
+ 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
+ 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
+ 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
+ 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
+ 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
+ -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
+ 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
+ -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
+ -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
+ -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
+ 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
+ -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
+ -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
+ 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
+ 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
+ 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
+ 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
+ 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
+ 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
+ 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
+ -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
+ -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
+ -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
+ 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
+ -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
+ -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
+ 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
+ -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
+ 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
+ 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
+ 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
+ -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
+ 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
+ -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
+ 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
+ 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
+ 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
+ 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
+ -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
+ -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
+ 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
+ -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
+ 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
+ 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
+ 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
+ -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
+ -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
+ 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
+ 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
+ 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
+ -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
+ -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
+ 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
+ -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
+ -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
+ -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
+ 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
+ -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
+ 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
+ -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
+ 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
+ -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
+ 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
+ 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
+ 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
+ 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
+ -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
+ -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
+ -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
+ -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
+ 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
+ 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
+ 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
+ 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
+ -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
+ 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
+ -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
+ 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
+ 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
+ -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
+ -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
+ -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
+ -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
+ 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
+ -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
+ -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
+ -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
+ -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
+ 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
+ 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
+ -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
+ -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
+ 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
+ 428, -484
+};
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_TABLES_H
+#define DAV1D_SRC_TABLES_H
+
+#include <stdint.h>
+
+#include "common/intops.h"
+
+#include "src/levels.h"
+
+EXTERN const uint8_t dav1d_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS];
+EXTERN const uint8_t /* enum BlockSize */
+ dav1d_block_sizes[N_BL_LEVELS][N_PARTITIONS][2];
+// width, height (in 4px blocks), log2 versions of these two
+EXTERN const uint8_t dav1d_block_dimensions[N_BS_SIZES][4];
+typedef struct TxfmInfo {
+ // width, height (in 4px blocks), log2 of them, min/max of log2, sub, pad
+ uint8_t w, h, lw, lh, min, max, sub, ctx;
+} TxfmInfo;
+EXTERN const TxfmInfo dav1d_txfm_dimensions[N_RECT_TX_SIZES];
+EXTERN const uint8_t /* enum (Rect)TxfmSize */
+ dav1d_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */];
+EXTERN const uint8_t /* enum TxfmType */
+ dav1d_txtp_from_uvmode[N_UV_INTRA_PRED_MODES];
+
+EXTERN const uint8_t /* enum InterPredMode */
+ dav1d_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2];
+
+EXTERN const uint8_t dav1d_partition_type_count[N_BL_LEVELS];
+EXTERN const uint8_t /* enum TxfmType */ dav1d_tx_types_per_set[40];
+
+EXTERN const uint8_t dav1d_filter_mode_to_y_mode[5];
+EXTERN const uint8_t dav1d_ymode_size_context[N_BS_SIZES];
+EXTERN const uint8_t dav1d_lo_ctx_offsets[3][5][5];
+EXTERN const uint8_t dav1d_skip_ctx[5][5];
+EXTERN const uint8_t /* enum TxClass */
+ dav1d_tx_type_class[N_TX_TYPES_PLUS_LL];
+EXTERN const uint8_t /* enum Filter2d */
+ dav1d_filter_2d[DAV1D_N_FILTERS /* h */][DAV1D_N_FILTERS /* v */];
+EXTERN const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2];
+EXTERN const uint8_t dav1d_intra_mode_context[N_INTRA_PRED_MODES];
+EXTERN const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES];
+
+static const unsigned cfl_allowed_mask =
+ (1 << BS_32x32) |
+ (1 << BS_32x16) |
+ (1 << BS_32x8) |
+ (1 << BS_16x32) |
+ (1 << BS_16x16) |
+ (1 << BS_16x8) |
+ (1 << BS_16x4) |
+ (1 << BS_8x32) |
+ (1 << BS_8x16) |
+ (1 << BS_8x8) |
+ (1 << BS_8x4) |
+ (1 << BS_4x16) |
+ (1 << BS_4x8) |
+ (1 << BS_4x4);
+
+static const unsigned wedge_allowed_mask =
+ (1 << BS_32x32) |
+ (1 << BS_32x16) |
+ (1 << BS_32x8) |
+ (1 << BS_16x32) |
+ (1 << BS_16x16) |
+ (1 << BS_16x8) |
+ (1 << BS_8x32) |
+ (1 << BS_8x16) |
+ (1 << BS_8x8);
+
+static const unsigned interintra_allowed_mask =
+ (1 << BS_32x32) |
+ (1 << BS_32x16) |
+ (1 << BS_16x32) |
+ (1 << BS_16x16) |
+ (1 << BS_16x8) |
+ (1 << BS_8x16) |
+ (1 << BS_8x8);
+
+EXTERN const Dav1dWarpedMotionParams dav1d_default_wm_params;
+
+EXTERN const int8_t dav1d_cdef_directions[12][2];
+
+EXTERN const uint16_t dav1d_sgr_params[16][2];
+EXTERN const uint8_t dav1d_sgr_x_by_x[256];
+
+EXTERN const int8_t dav1d_mc_subpel_filters[6][15][8];
+EXTERN const int8_t dav1d_mc_warp_filter[193][8];
+EXTERN const int8_t dav1d_resize_filter[64][8];
+
+EXTERN const uint8_t dav1d_sm_weights[128];
+EXTERN const uint16_t dav1d_dr_intra_derivative[44];
+EXTERN const int8_t dav1d_filter_intra_taps[5][64];
+
+EXTERN const uint8_t dav1d_obmc_masks[64];
+
+EXTERN const int16_t dav1d_gaussian_sequence[2048]; // for fgs
+
+#endif /* DAV1D_SRC_TABLES_H */
--- /dev/null
+/*
+ * Copyright © 2018-2021, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_THREAD_H
+#define DAV1D_SRC_THREAD_H
+
+#if defined(_WIN32)
+
+#include <limits.h>
+#include <windows.h>
+
+#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
+
+typedef struct {
+ HANDLE h;
+ void *(*func)(void*);
+ void *arg;
+} pthread_t;
+
+typedef struct {
+ unsigned stack_size;
+} pthread_attr_t;
+
+typedef SRWLOCK pthread_mutex_t;
+typedef CONDITION_VARIABLE pthread_cond_t;
+typedef INIT_ONCE pthread_once_t;
+
+void dav1d_init_thread(void);
+void dav1d_set_thread_name(const wchar_t *name);
+#define dav1d_set_thread_name(name) dav1d_set_thread_name(L##name)
+
+int dav1d_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*func)(void*), void *arg);
+int dav1d_pthread_join(pthread_t *thread, void **res);
+int dav1d_pthread_once(pthread_once_t *once_control,
+ void (*init_routine)(void));
+
+#define pthread_create dav1d_pthread_create
+#define pthread_join(thread, res) dav1d_pthread_join(&(thread), res)
+#define pthread_once dav1d_pthread_once
+
+static inline int pthread_attr_init(pthread_attr_t *const attr) {
+ attr->stack_size = 0;
+ return 0;
+}
+
+static inline int pthread_attr_destroy(pthread_attr_t *const attr) {
+ return 0;
+}
+
+static inline int pthread_attr_setstacksize(pthread_attr_t *const attr,
+ const size_t stack_size)
+{
+ if (stack_size > UINT_MAX) return 1;
+ attr->stack_size = (unsigned) stack_size;
+ return 0;
+}
+
+static inline int pthread_mutex_init(pthread_mutex_t *const mutex,
+ const void *const attr)
+{
+ InitializeSRWLock(mutex);
+ return 0;
+}
+
+static inline int pthread_mutex_destroy(pthread_mutex_t *const mutex) {
+ return 0;
+}
+
+static inline int pthread_mutex_lock(pthread_mutex_t *const mutex) {
+ AcquireSRWLockExclusive(mutex);
+ return 0;
+}
+
+static inline int pthread_mutex_unlock(pthread_mutex_t *const mutex) {
+ ReleaseSRWLockExclusive(mutex);
+ return 0;
+}
+
+static inline int pthread_cond_init(pthread_cond_t *const cond,
+ const void *const attr)
+{
+ InitializeConditionVariable(cond);
+ return 0;
+}
+
+static inline int pthread_cond_destroy(pthread_cond_t *const cond) {
+ return 0;
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *const cond,
+ pthread_mutex_t *const mutex)
+{
+ return !SleepConditionVariableSRW(cond, mutex, INFINITE, 0);
+}
+
+static inline int pthread_cond_signal(pthread_cond_t *const cond) {
+ WakeConditionVariable(cond);
+ return 0;
+}
+
+static inline int pthread_cond_broadcast(pthread_cond_t *const cond) {
+ WakeAllConditionVariable(cond);
+ return 0;
+}
+
+#else
+
+#include <pthread.h>
+
+#define dav1d_init_thread() do {} while (0)
+
+/* Thread naming support */
+
+#ifdef __linux__
+
+#include <sys/prctl.h>
+
+static inline void dav1d_set_thread_name(const char *const name) {
+ prctl(PR_SET_NAME, name);
+}
+
+#elif defined(__APPLE__)
+
+static inline void dav1d_set_thread_name(const char *const name) {
+ pthread_setname_np(name);
+}
+
+#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__)
+
+#if defined(__FreeBSD__)
+ /* ALIGN from <sys/param.h> conflicts with ALIGN from "common/attributes.h" */
+#define _SYS_PARAM_H_
+#include <sys/types.h>
+#endif
+#include <pthread_np.h>
+
+static inline void dav1d_set_thread_name(const char *const name) {
+ pthread_set_name_np(pthread_self(), name);
+}
+
+#elif defined(__NetBSD__)
+
+static inline void dav1d_set_thread_name(const char *const name) {
+ pthread_setname_np(pthread_self(), "%s", (void*)name);
+}
+
+#elif defined(__HAIKU__)
+
+#include <os/kernel/OS.h>
+
+static inline void dav1d_set_thread_name(const char *const name) {
+ rename_thread(find_thread(NULL), name);
+}
+
+#else
+
+#define dav1d_set_thread_name(name) do {} while (0)
+
+#endif
+
+#endif
+
+#endif /* DAV1D_SRC_THREAD_H */
--- /dev/null
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_THREAD_DATA_H
+#define DAV1D_SRC_THREAD_DATA_H
+
+#include "src/thread.h"
+
+struct thread_data {
+ pthread_t thread;
+ pthread_cond_t cond;
+ pthread_mutex_t lock;
+ int inited;
+};
+
+#endif /* DAV1D_SRC_THREAD_DATA_H */