From: Konstantinos Margaritis Date: Tue, 11 Oct 2022 09:51:34 +0000 (+0000) Subject: WIP: add initial AV1 SVP64 porting X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=17719e8b26d6b198279f8004d90a256e0890a30b;p=openpower-isa.git WIP: add initial AV1 SVP64 porting --- diff --git a/media/video/av1/Makefile b/media/video/av1/Makefile new file mode 100644 index 00000000..3f8e026c --- /dev/null +++ b/media/video/av1/Makefile @@ -0,0 +1,29 @@ +TARGET=dav1d_svp64_test + +CC=gcc +CXX=g++ +AS=powerpc64le-linux-gnu-as +OBJCOPY=powerpc64le-linux-gnu-objcopy +CFLAGS= -O -g3 -std=c99 -I../../pypowersim_wrapper -I. -Iinclude -I/usr/include/python3.7m -DHAVE_SVP64 -D_GNU_SOURCE -DNDEBUG -D_FILE_OFFSET_BITS=64 -DBITDEPTH=16 -Wundef -Werror=vla -Wno-maybe-uninitialized -Wno-missing-field-initializers -Wno-unused-parameter -Wstrict-prototypes -Werror=missing-prototypes -fomit-frame-pointer +CXXFLAGS= -Iinclude -O -g3 +ASFLAGS= -mlibresoc -mregnames +LDFLAGS=-lgtest -pthread -lpython3.7m + +BINFILES = +ASFILES = src/ppc/cdef_tmpl_svp64_real.s +CFILES = cdef.c checkasm.c src/cpu.c src/ppc/cpu.c src/ppc/cdef_tmpl_svp64_wrapper.c src/cdef_tmpl.c src/tables.c src/log.c +CPPFILES = +OBJFILES = $(ASFILES:.s=.o) $(CFILES:.c=.o) $(CPPFILES:.cc=.o) + +%.bin: %.o + ${OBJCOPY} -I elf64-little -O binary $< $@ + +${TARGET}: ${OBJFILES} + #${OBJCOPY} --globalize-symbols=src/filmgrain_tmpl.symbols src/filmgrain_tmpl.o + ${CXX} -o ${TARGET} ${OBJFILES} ${LDFLAGS} + +all: ${TARGET} ${BINFILES} + +.PHONY: clean +clean: + rm -f ${TARGET} ${OBJFILES} ${BINFILES} diff --git a/media/video/av1/cdef.c b/media/video/av1/cdef.c new file mode 100644 index 00000000..6d77db01 --- /dev/null +++ b/media/video/av1/cdef.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "checkasm.h" + +#include +#include + +#include "common/dump.h" + +#include "src/levels.h" +#include "src/cdef.h" + +static int to_binary(int x) { /* 0-15 -> 0000-1111 */ + return (x & 1) + 5 * (x & 2) + 25 * (x & 4) + 125 * (x & 8); +} + +static void init_tmp(pixel *buf, int n, const int bitdepth_max) { + const int fill_type = rnd() & 7; + if (fill_type == 0) + while (n--) /* check for cdef_filter underflows */ + *buf++ = rnd() & 1; + else if (fill_type == 1) + while (n--) /* check for cdef_filter overflows */ + *buf++ = bitdepth_max - (rnd() & 1); + else + while (n--) + *buf++ = rnd() & bitdepth_max; +} + +static void check_cdef_filter(const cdef_fn fn, const int w, const int h) { + ALIGN_STK_64(pixel, c_src, 16 * 10 + 16, ), *const c_dst = c_src + 8; + ALIGN_STK_64(pixel, a_src, 16 * 10 + 16, ), *const a_dst = a_src + 8; + ALIGN_STK_64(pixel, top_buf, 16 * 2 + 16, ), *const top = top_buf + 8; + ALIGN_STK_64(pixel, bot_buf, 16 * 2 + 16, ), *const bot = bot_buf + 8; + ALIGN_STK_16(pixel, left, 8,[2]); + const ptrdiff_t stride = 16 * sizeof(pixel); + + declare_func(void, pixel *dst, ptrdiff_t dst_stride, const pixel (*left)[2], + const pixel *top, const pixel *bot, int pri_strength, + int sec_strength, int dir, int damping, + enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX); + + for (int s = 0x1; s <= 0x3; s++) { + if (check_func(fn, "cdef_filter_%dx%d_%02d_%dbpc", w, h, to_binary(s), BITDEPTH)) { + for (int dir = 0; dir < 8; dir++) { + for (enum CdefEdgeFlags edges = 0x0; edges <= 0xf; edges++) { +#if BITDEPTH == 16 + const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff; +#else + const int bitdepth_max = 0xff; +#endif + const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; + + init_tmp(c_src, 16 * 10 + 16, bitdepth_max); + init_tmp(top_buf, 16 * 2 + 16, bitdepth_max); + init_tmp(bot_buf, 16 * 2 + 16, bitdepth_max); + init_tmp((pixel *) left, 8 * 2, bitdepth_max); + memcpy(a_src, c_src, (16 * 10 + 16) * sizeof(pixel)); + + const int pri_strength = s & 2 ? (1 + (rnd() % 15)) << bitdepth_min_8 : 0; + const int sec_strength = s & 1 ? 1 << ((rnd() % 3) + bitdepth_min_8) : 0; + const int damping = 3 + (rnd() & 3) + bitdepth_min_8 - (w == 4 || (rnd() & 1)); + call_ref(c_dst, stride, left, top, bot, pri_strength, sec_strength, + dir, damping, edges HIGHBD_TAIL_SUFFIX); + call_new(a_dst, stride, left, top, bot, pri_strength, sec_strength, + dir, damping, edges HIGHBD_TAIL_SUFFIX); + if (checkasm_check_pixel(c_dst, stride, a_dst, stride, w, h, "dst")) { + fprintf(stderr, "strength = %d:%d, dir = %d, damping = %d, edges = %04d\n", + pri_strength, sec_strength, dir, damping, to_binary(edges)); + return; + } + if (dir == 7 && (edges == 0x5 || edges == 0xa || edges == 0xf)) + bench_new(alternate(c_dst, a_dst), stride, left, top, bot, pri_strength, + sec_strength, dir, damping, edges HIGHBD_TAIL_SUFFIX); + } + } + } + } +} + +static void check_cdef_direction(const cdef_dir_fn fn) { + ALIGN_STK_64(pixel, src, 8 * 8,); + + declare_func(int, pixel *src, ptrdiff_t dst_stride, unsigned *var + HIGHBD_DECL_SUFFIX); + + if (check_func(fn, "cdef_dir_%dbpc", BITDEPTH)) { + unsigned c_var, a_var; +#if BITDEPTH == 16 + const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff; +#else + const int bitdepth_max = 0xff; +#endif + init_tmp(src, 64, bitdepth_max); + + const int c_dir = call_ref(src, 8 * sizeof(pixel), &c_var HIGHBD_TAIL_SUFFIX); + const int a_dir = call_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX); + if (c_var != a_var || c_dir != a_dir) { + if (fail()) { + hex_fdump(stderr, src, 8 * sizeof(pixel), 8, 8, "src"); + fprintf(stderr, "c_dir %d a_dir %d\n", c_dir, a_dir); + } + } + bench_new(src, 8 * sizeof(pixel), &a_var HIGHBD_TAIL_SUFFIX); + } + report("cdef_dir"); +} + +void bitfn(checkasm_check_cdef)(void) { + Dav1dCdefDSPContext c; + bitfn(dav1d_cdef_dsp_init)(&c); + + check_cdef_direction(c.dir); + + check_cdef_filter(c.fb[0], 8, 8); + check_cdef_filter(c.fb[1], 4, 8); + check_cdef_filter(c.fb[2], 4, 4); + report("cdef_filter"); +} diff --git a/media/video/av1/checkasm.c b/media/video/av1/checkasm.c new file mode 100644 index 00000000..824d8352 --- /dev/null +++ b/media/video/av1/checkasm.c @@ -0,0 +1,908 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "checkasm.h" + +#include +#include +#include +#include + +#include "src/cpu.h" + +#ifdef _WIN32 +#include +#define COLOR_RED FOREGROUND_RED +#define COLOR_GREEN FOREGROUND_GREEN +#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) +#else +#include +#include +#include +#ifdef __APPLE__ +#include +#endif +#define COLOR_RED 1 +#define COLOR_GREEN 2 +#define COLOR_YELLOW 3 +#endif + +/* List of tests to invoke */ +static const struct { + const char *name; + void (*func)(void); +} tests[] = { +#if CONFIG_16BPC + { "cdef_16bpc", checkasm_check_cdef_16bpc }, +#endif + { 0 } +}; + +/* List of cpu flags to check */ +static const struct { + const char *name; + const char *suffix; + unsigned flag; +} cpus[] = { +#if ARCH_X86 + { "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 }, + { "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 }, + { "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 }, + { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 }, + { "AVX-512 (Ice Lake)", "avx512icl", DAV1D_X86_CPU_FLAG_AVX512ICL }, +#elif ARCH_AARCH64 || ARCH_ARM + { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON }, +#elif ARCH_PPC64LE + { "VSX", "vsx", DAV1D_PPC_CPU_FLAG_VSX }, + { "SVP64", "svp64", DAV1D_PPC_CPU_FLAG_SVP64 }, +#endif + { 0 } +}; + +typedef struct CheckasmFuncVersion { + struct CheckasmFuncVersion *next; + void *func; + int ok; + unsigned cpu; + int iterations; + uint64_t cycles; +} CheckasmFuncVersion; + +/* Binary search tree node */ +typedef struct CheckasmFunc { + struct CheckasmFunc *child[2]; + CheckasmFuncVersion versions; + uint8_t color; /* 0 = red, 1 = black */ + char name[]; +} CheckasmFunc; + +/* Internal state */ +static struct { + CheckasmFunc *funcs; + CheckasmFunc *current_func; + CheckasmFuncVersion *current_func_ver; + const char *current_test_name; + int num_checked; + int num_failed; + int nop_time; + unsigned cpu_flag; + const char *cpu_flag_name; + const char *test_pattern; + const char *function_pattern; + unsigned seed; + int bench; + int bench_c; + int verbose; + int function_listing; + int catch_signals; +#if ARCH_X86_64 + void (*simd_warmup)(void); +#endif +} state; + +/* float compare support code */ +typedef union { + float f; + uint32_t i; +} intfloat; + +static uint32_t xs_state[4]; + +static void xor128_srand(unsigned seed) { + xs_state[0] = seed; + xs_state[1] = ( seed & 0xffff0000) | (~seed & 0x0000ffff); + xs_state[2] = (~seed & 0xffff0000) | ( seed & 0x0000ffff); + xs_state[3] = ~seed; +} + +// xor128 from Marsaglia, George (July 2003). "Xorshift RNGs". +// Journal of Statistical Software. 8 (14). +// doi:10.18637/jss.v008.i14. +int xor128_rand(void) { + const uint32_t x = xs_state[0]; + const uint32_t t = x ^ (x << 11); + + xs_state[0] = xs_state[1]; + xs_state[1] = xs_state[2]; + xs_state[2] = xs_state[3]; + uint32_t w = xs_state[3]; + + w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); + xs_state[3] = w; + + return w >> 1; +} + +static int is_negative(const intfloat u) { + return u.i >> 31; +} + +int float_near_ulp(const float a, const float b, const unsigned max_ulp) { + intfloat x, y; + + x.f = a; + y.f = b; + + if (is_negative(x) != is_negative(y)) { + // handle -0.0 == +0.0 + return a == b; + } + + if (llabs((int64_t)x.i - y.i) <= max_ulp) + return 1; + + return 0; +} + +int float_near_ulp_array(const float *const a, const float *const b, + const unsigned max_ulp, const int len) +{ + for (int i = 0; i < len; i++) + if (!float_near_ulp(a[i], b[i], max_ulp)) + return 0; + + return 1; +} + +int float_near_abs_eps(const float a, const float b, const float eps) { + return fabsf(a - b) < eps; +} + +int float_near_abs_eps_array(const float *const a, const float *const b, + const float eps, const int len) +{ + for (int i = 0; i < len; i++) + if (!float_near_abs_eps(a[i], b[i], eps)) + return 0; + + return 1; +} + +int float_near_abs_eps_ulp(const float a, const float b, const float eps, + const unsigned max_ulp) +{ + return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps); +} + +int float_near_abs_eps_array_ulp(const float *const a, const float *const b, + const float eps, const unsigned max_ulp, + const int len) +{ + for (int i = 0; i < len; i++) + if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp)) + return 0; + + return 1; +} + +/* Print colored text to stderr if the terminal supports it */ +static void color_printf(const int color, const char *const fmt, ...) { + static int8_t use_color = -1; + va_list arg; + +#ifdef _WIN32 + static HANDLE con; + static WORD org_attributes; + + if (use_color < 0) { + CONSOLE_SCREEN_BUFFER_INFO con_info; + con = GetStdHandle(STD_ERROR_HANDLE); + if (con && con != INVALID_HANDLE_VALUE && + GetConsoleScreenBufferInfo(con, &con_info)) + { + org_attributes = con_info.wAttributes; + use_color = 1; + } else + use_color = 0; + } + if (use_color) + SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | + (color & 0x0f)); +#else + if (use_color < 0) { + const char *const term = getenv("TERM"); + use_color = term && strcmp(term, "dumb") && isatty(2); + } + if (use_color) + fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); +#endif + + va_start(arg, fmt); + vfprintf(stderr, fmt, arg); + va_end(arg); + + if (use_color) { +#ifdef _WIN32 + SetConsoleTextAttribute(con, org_attributes); +#else + fprintf(stderr, "\x1b[0m"); +#endif + } +} + +/* Deallocate a tree */ +static void destroy_func_tree(CheckasmFunc *const f) { + if (f) { + CheckasmFuncVersion *v = f->versions.next; + while (v) { + CheckasmFuncVersion *next = v->next; + free(v); + v = next; + } + + destroy_func_tree(f->child[0]); + destroy_func_tree(f->child[1]); + free(f); + } +} + +/* Allocate a zero-initialized block, clean up and exit on failure */ +static void *checkasm_malloc(const size_t size) { + void *const ptr = calloc(1, size); + if (!ptr) { + fprintf(stderr, "checkasm: malloc failed\n"); + destroy_func_tree(state.funcs); + exit(1); + } + return ptr; +} + +/* Get the suffix of the specified cpu flag */ +static const char *cpu_suffix(const unsigned cpu) { + for (int i = (int)(sizeof(cpus) / sizeof(*cpus)) - 2; i >= 0; i--) + if (cpu & cpus[i].flag) + return cpus[i].suffix; + + return "c"; +} + +#ifdef readtime +static int cmp_nop(const void *a, const void *b) { + return *(const uint16_t*)a - *(const uint16_t*)b; +} + +/* Measure the overhead of the timing code (in decicycles) */ +static int measure_nop_time(void) { + uint16_t nops[10000]; + int nop_sum = 0; + + for (int i = 0; i < 10000; i++) { + uint64_t t = readtime(); + nops[i] = (uint16_t) (readtime() - t); + } + + qsort(nops, 10000, sizeof(uint16_t), cmp_nop); + for (int i = 2500; i < 7500; i++) + nop_sum += nops[i]; + + return nop_sum / 500; +} + +/* Print benchmark results */ +static void print_benchs(const CheckasmFunc *const f) { + if (f) { + print_benchs(f->child[0]); + + /* Only print functions with at least one assembly version */ + if (state.bench_c || f->versions.cpu || f->versions.next) { + const CheckasmFuncVersion *v = &f->versions; + do { + if (v->iterations) { + const int decicycles = (int) (10*v->cycles/v->iterations - + state.nop_time) / 4; + printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), + decicycles/10, decicycles%10); + } + } while ((v = v->next)); + } + + print_benchs(f->child[1]); + } +} +#endif + +static void print_functions(const CheckasmFunc *const f) { + if (f) { + print_functions(f->child[0]); + printf("%s\n", f->name); + print_functions(f->child[1]); + } +} + +#define is_digit(x) ((x) >= '0' && (x) <= '9') + +/* ASCIIbetical sort except preserving natural order for numbers */ +static int cmp_func_names(const char *a, const char *b) { + const char *const start = a; + int ascii_diff, digit_diff; + + for (; !(ascii_diff = *(const unsigned char*)a - + *(const unsigned char*)b) && *a; a++, b++); + for (; is_digit(*a) && is_digit(*b); a++, b++); + + if (a > start && is_digit(a[-1]) && + (digit_diff = is_digit(*a) - is_digit(*b))) + { + return digit_diff; + } + + return ascii_diff; +} + +/* Perform a tree rotation in the specified direction and return the new root */ +static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) { + CheckasmFunc *const r = f->child[dir^1]; + f->child[dir^1] = r->child[dir]; + r->child[dir] = f; + r->color = f->color; + f->color = 0; + return r; +} + +#define is_red(f) ((f) && !(f)->color) + +/* Balance a left-leaning red-black tree at the specified node */ +static void balance_tree(CheckasmFunc **const root) { + CheckasmFunc *const f = *root; + + if (is_red(f->child[0]) && is_red(f->child[1])) { + f->color ^= 1; + f->child[0]->color = f->child[1]->color = 1; + } + else if (!is_red(f->child[0]) && is_red(f->child[1])) + *root = rotate_tree(f, 0); /* Rotate left */ + else if (is_red(f->child[0]) && is_red(f->child[0]->child[0])) + *root = rotate_tree(f, 1); /* Rotate right */ +} + +/* Get a node with the specified name, creating it if it doesn't exist */ +static CheckasmFunc *get_func(CheckasmFunc **const root, const char *const name) { + CheckasmFunc *f = *root; + + if (f) { + /* Search the tree for a matching node */ + const int cmp = cmp_func_names(name, f->name); + if (cmp) { + f = get_func(&f->child[cmp > 0], name); + + /* Rebalance the tree on the way up if a new node was inserted */ + if (!f->versions.func) + balance_tree(root); + } + } else { + /* Allocate and insert a new node into the tree */ + const size_t name_length = strlen(name) + 1; + f = *root = checkasm_malloc(offsetof(CheckasmFunc, name) + name_length); + memcpy(f->name, name, name_length); + } + + return f; +} + +checkasm_context checkasm_context_buf; + +/* Crash handling: attempt to catch crashes and handle them + * gracefully instead of just aborting abruptly. */ +#ifdef _WIN32 +static LONG NTAPI signal_handler(EXCEPTION_POINTERS *const e) { + if (!state.catch_signals) + return EXCEPTION_CONTINUE_SEARCH; + + const char *err; + switch (e->ExceptionRecord->ExceptionCode) { + case EXCEPTION_FLT_DIVIDE_BY_ZERO: + case EXCEPTION_INT_DIVIDE_BY_ZERO: + err = "fatal arithmetic error"; + break; + case EXCEPTION_ILLEGAL_INSTRUCTION: + case EXCEPTION_PRIV_INSTRUCTION: + err = "illegal instruction"; + break; + case EXCEPTION_ACCESS_VIOLATION: + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + case EXCEPTION_DATATYPE_MISALIGNMENT: + case EXCEPTION_IN_PAGE_ERROR: + case EXCEPTION_STACK_OVERFLOW: + err = "segmentation fault"; + break; + default: + return EXCEPTION_CONTINUE_SEARCH; + } + state.catch_signals = 0; + checkasm_fail_func(err); + checkasm_load_context(); + return EXCEPTION_CONTINUE_EXECUTION; /* never reached, but shuts up gcc */ +} +#else +static void signal_handler(const int s) { + if (state.catch_signals) { + state.catch_signals = 0; + checkasm_fail_func(s == SIGFPE ? "fatal arithmetic error" : + s == SIGILL ? "illegal instruction" : + "segmentation fault"); + checkasm_load_context(); + } else { + /* fall back to the default signal handler */ + static const struct sigaction default_sa = { .sa_handler = SIG_DFL }; + sigaction(s, &default_sa, NULL); + raise(s); + } +} +#endif + +/* Compares a string with a wildcard pattern. */ +static int wildstrcmp(const char *str, const char *pattern) { + const char *wild = strchr(pattern, '*'); + if (wild) { + const size_t len = wild - pattern; + if (strncmp(str, pattern, len)) return 1; + while (*++wild == '*'); + if (!*wild) return 0; + str += len; + while (*str && wildstrcmp(str, wild)) str++; + return !*str; + } + return strcmp(str, pattern); +} + +/* Perform tests and benchmarks for the specified + * cpu flag if supported by the host */ +static void check_cpu_flag(const char *const name, unsigned flag) { + const unsigned old_cpu_flag = state.cpu_flag; + + flag |= old_cpu_flag; + dav1d_set_cpu_flags_mask(flag); + state.cpu_flag = dav1d_get_cpu_flags(); + + if (!flag || state.cpu_flag != old_cpu_flag) { + state.cpu_flag_name = name; + for (int i = 0; tests[i].func; i++) { + if (state.test_pattern && wildstrcmp(tests[i].name, state.test_pattern)) + continue; + xor128_srand(state.seed); + state.current_test_name = tests[i].name; + tests[i].func(); + } + } +} + +/* Print the name of the current CPU flag, but only do it once */ +static void print_cpu_name(void) { + if (state.cpu_flag_name) { + color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); + state.cpu_flag_name = NULL; + } +} + +static unsigned get_seed(void) { +#ifdef _WIN32 + LARGE_INTEGER i; + QueryPerformanceCounter(&i); + return i.LowPart; +#elif defined(__APPLE__) + return (unsigned) mach_absolute_time(); +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (unsigned) (1000000000ULL * ts.tv_sec + ts.tv_nsec); +#endif +} + +int main(int argc, char *argv[]) { + state.seed = get_seed(); + + while (argc > 1) { + if (!strncmp(argv[1], "--help", 6) || !strcmp(argv[1], "-h")) { + fprintf(stderr, + "checkasm [options] \n" + " Numeric value to seed the rng\n" + "Options:\n" + " --test= Test only \n" + " --function= -f Test only the functions matching \n" + " --bench -b Benchmark the tested functions\n" + " --list-functions List available functions\n" + " --list-tests List available tests\n" + " --bench-c -c Benchmark the C-only functions\n" + " --verbose -v Print failures verbosely\n"); + return 0; + } else if (!strcmp(argv[1], "--bench-c") || !strcmp(argv[1], "-c")) { + state.bench_c = 1; + } else if (!strcmp(argv[1], "--bench") || !strcmp(argv[1], "-b")) { +#ifndef readtime + fprintf(stderr, + "checkasm: --bench is not supported on your system\n"); + return 1; +#endif + state.bench = 1; + } else if (!strncmp(argv[1], "--test=", 7)) { + state.test_pattern = argv[1] + 7; + } else if (!strcmp(argv[1], "-t")) { + state.test_pattern = argc > 1 ? argv[2] : ""; + argc--; + argv++; + } else if (!strncmp(argv[1], "--function=", 11)) { + state.function_pattern = argv[1] + 11; + } else if (!strcmp(argv[1], "-f")) { + state.function_pattern = argc > 1 ? argv[2] : ""; + argc--; + argv++; + } else if (!strcmp(argv[1], "--list-functions")) { + state.function_listing = 1; + } else if (!strcmp(argv[1], "--list-tests")) { + for (int i = 0; tests[i].name; i++) + printf("%s\n", tests[i].name); + return 0; + } else if (!strcmp(argv[1], "--verbose") || !strcmp(argv[1], "-v")) { + state.verbose = 1; + } else { + state.seed = (unsigned) strtoul(argv[1], NULL, 10); + } + + argc--; + argv++; + } + +#if TRIM_DSP_FUNCTIONS + fprintf(stderr, "checkasm: reference functions unavailable\n"); + return 0; +#endif + + dav1d_init_cpu(); + +#ifdef _WIN32 +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + AddVectoredExceptionHandler(0, signal_handler); +#endif +#else + const struct sigaction sa = { + .sa_handler = signal_handler, + .sa_flags = SA_NODEFER, + }; + sigaction(SIGBUS, &sa, NULL); + sigaction(SIGFPE, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + sigaction(SIGSEGV, &sa, NULL); +#endif + +#ifdef readtime + if (state.bench) { + static int testing = 0; + checkasm_save_context(); + if (!testing) { + checkasm_set_signal_handler_state(1); + testing = 1; + readtime(); + checkasm_set_signal_handler_state(0); + } else { + fprintf(stderr, "checkasm: unable to access cycle counter\n"); + return 1; + } + } +#endif + + int ret = 0; + + if (!state.function_listing) { +#if ARCH_X86_64 + void checkasm_warmup_avx2(void); + void checkasm_warmup_avx512(void); + const unsigned cpu_flags = dav1d_get_cpu_flags(); + if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL) + state.simd_warmup = checkasm_warmup_avx512; + else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2) + state.simd_warmup = checkasm_warmup_avx2; + checkasm_simd_warmup(); +#endif +#if ARCH_X86 + unsigned checkasm_init_x86(char *name); + char name[48]; + const unsigned cpuid = checkasm_init_x86(name); + for (size_t len = strlen(name); len && name[len-1] == ' '; len--) + name[len-1] = '\0'; /* trim trailing whitespace */ + fprintf(stderr, "checkasm: %s (%08X) using random seed %u\n", name, cpuid, state.seed); +#else + fprintf(stderr, "checkasm: using random seed %u\n", state.seed); +#endif + } + + check_cpu_flag(NULL, 0); + + if (state.function_listing) { + print_functions(state.funcs); + } else { + for (int i = 0; cpus[i].flag; i++) + check_cpu_flag(cpus[i].name, cpus[i].flag); + if (!state.num_checked) { + fprintf(stderr, "checkasm: no tests to perform\n"); + } else if (state.num_failed) { + fprintf(stderr, "checkasm: %d of %d tests have failed\n", + state.num_failed, state.num_checked); + ret = 1; + } else { + fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); +#ifdef readtime + if (state.bench) { + state.nop_time = measure_nop_time(); + printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); + print_benchs(state.funcs); + } +#endif + } + } + + destroy_func_tree(state.funcs); + return ret; +} + +/* Decide whether or not the specified function needs to be tested and + * allocate/initialize data structures if needed. Returns a pointer to a + * reference function if the function should be tested, otherwise NULL */ +void *checkasm_check_func(void *const func, const char *const name, ...) { + char name_buf[256]; + va_list arg; + + va_start(arg, name); + const int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); + va_end(arg); + + if (!func || name_length <= 0 || (size_t)name_length >= sizeof(name_buf) || + (state.function_pattern && wildstrcmp(name_buf, state.function_pattern))) + { + return NULL; + } + + state.current_func = get_func(&state.funcs, name_buf); + + if (state.function_listing) /* Save function names without running tests */ + return NULL; + + state.funcs->color = 1; + CheckasmFuncVersion *v = &state.current_func->versions; + void *ref = func; + + if (v->func) { + CheckasmFuncVersion *prev; + do { + /* Only test functions that haven't already been tested */ + if (v->func == func) + return NULL; + + if (v->ok) + ref = v->func; + + prev = v; + } while ((v = v->next)); + + v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); + } + + v->func = func; + v->ok = 1; + v->cpu = state.cpu_flag; + state.current_func_ver = v; + xor128_srand(state.seed); + + if (state.cpu_flag || state.bench_c) + state.num_checked++; + + return ref; +} + +/* Decide whether or not the current function needs to be benchmarked */ +int checkasm_bench_func(void) { + return !state.num_failed && state.bench; +} + +/* Indicate that the current test has failed, return whether verbose printing + * is requested. */ +int checkasm_fail_func(const char *const msg, ...) { + if (state.current_func_ver && state.current_func_ver->cpu && + state.current_func_ver->ok) + { + va_list arg; + + print_cpu_name(); + fprintf(stderr, " %s_%s (", state.current_func->name, + cpu_suffix(state.current_func_ver->cpu)); + va_start(arg, msg); + vfprintf(stderr, msg, arg); + va_end(arg); + fprintf(stderr, ")\n"); + + state.current_func_ver->ok = 0; + state.num_failed++; + } + return state.verbose; +} + +/* Update benchmark results of the current function */ +void checkasm_update_bench(const int iterations, const uint64_t cycles) { + state.current_func_ver->iterations += iterations; + state.current_func_ver->cycles += cycles; +} + +/* Print the outcome of all tests performed since + * the last time this function was called */ +void checkasm_report(const char *const name, ...) { + static int prev_checked, prev_failed; + static size_t max_length; + + if (state.num_checked > prev_checked) { + int pad_length = (int) max_length + 4; + va_list arg; + + print_cpu_name(); + pad_length -= fprintf(stderr, " - %s.", state.current_test_name); + va_start(arg, name); + pad_length -= vfprintf(stderr, name, arg); + va_end(arg); + fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '['); + + if (state.num_failed == prev_failed) + color_printf(COLOR_GREEN, "OK"); + else + color_printf(COLOR_RED, "FAILED"); + fprintf(stderr, "]\n"); + + prev_checked = state.num_checked; + prev_failed = state.num_failed; + } else if (!state.cpu_flag) { + /* Calculate the amount of padding required + * to make the output vertically aligned */ + size_t length = strlen(state.current_test_name); + va_list arg; + + va_start(arg, name); + length += vsnprintf(NULL, 0, name, arg); + va_end(arg); + + if (length > max_length) + max_length = length; + } +} + +void checkasm_set_signal_handler_state(const int enabled) { + state.catch_signals = enabled; +} + +static int check_err(const char *const file, const int line, + const char *const name, const int w, const int h, + int *const err) +{ + if (*err) + return 0; + if (!checkasm_fail_func("%s:%d", file, line)) + return 1; + *err = 1; + fprintf(stderr, "%s (%dx%d):\n", name, w, h); + return 0; +} + +#define DEF_CHECKASM_CHECK_FUNC(type, fmt) \ +int checkasm_check_##type(const char *const file, const int line, \ + const type *buf1, ptrdiff_t stride1, \ + const type *buf2, ptrdiff_t stride2, \ + const int w, int h, const char *const name, \ + const int align_w, const int align_h, \ + const int padding) \ +{ \ + int aligned_w = (w + align_w - 1) & ~(align_w - 1); \ + int aligned_h = (h + align_h - 1) & ~(align_h - 1); \ + int err = 0; \ + stride1 /= sizeof(*buf1); \ + stride2 /= sizeof(*buf2); \ + int y = 0; \ + for (y = 0; y < h; y++) \ + if (memcmp(&buf1[y*stride1], &buf2[y*stride2], w*sizeof(*buf1))) \ + break; \ + if (y != h) { \ + if (check_err(file, line, name, w, h, &err)) \ + return 1; \ + for (y = 0; y < h; y++) { \ + for (int x = 0; x < w; x++) \ + fprintf(stderr, " " fmt, buf1[x]); \ + fprintf(stderr, " "); \ + for (int x = 0; x < w; x++) \ + fprintf(stderr, " " fmt, buf2[x]); \ + fprintf(stderr, " "); \ + for (int x = 0; x < w; x++) \ + fprintf(stderr, "%c", buf1[x] != buf2[x] ? 'x' : '.'); \ + buf1 += stride1; \ + buf2 += stride2; \ + fprintf(stderr, "\n"); \ + } \ + buf1 -= h*stride1; \ + buf2 -= h*stride2; \ + } \ + for (y = -padding; y < 0; y++) \ + if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ + (w + 2*padding)*sizeof(*buf1))) { \ + if (check_err(file, line, name, w, h, &err)) \ + return 1; \ + fprintf(stderr, " overwrite above\n"); \ + break; \ + } \ + for (y = aligned_h; y < aligned_h + padding; y++) \ + if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ + (w + 2*padding)*sizeof(*buf1))) { \ + if (check_err(file, line, name, w, h, &err)) \ + return 1; \ + fprintf(stderr, " overwrite below\n"); \ + break; \ + } \ + for (y = 0; y < h; y++) \ + if (memcmp(&buf1[y*stride1 - padding], &buf2[y*stride2 - padding], \ + padding*sizeof(*buf1))) { \ + if (check_err(file, line, name, w, h, &err)) \ + return 1; \ + fprintf(stderr, " overwrite left\n"); \ + break; \ + } \ + for (y = 0; y < h; y++) \ + if (memcmp(&buf1[y*stride1 + aligned_w], &buf2[y*stride2 + aligned_w], \ + padding*sizeof(*buf1))) { \ + if (check_err(file, line, name, w, h, &err)) \ + return 1; \ + fprintf(stderr, " overwrite right\n"); \ + break; \ + } \ + return err; \ +} + +DEF_CHECKASM_CHECK_FUNC(int8_t, "%4d") +DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d") +DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d") +DEF_CHECKASM_CHECK_FUNC(uint8_t, "%02x") +DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x") +DEF_CHECKASM_CHECK_FUNC(uint32_t, "%08x") + +#if ARCH_X86_64 +void checkasm_simd_warmup(void) +{ + if (state.simd_warmup) + state.simd_warmup(); +} +#endif diff --git a/media/video/av1/checkasm.h b/media/video/av1/checkasm.h new file mode 100644 index 00000000..29c1dbe2 --- /dev/null +++ b/media/video/av1/checkasm.h @@ -0,0 +1,379 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_TESTS_CHECKASM_CHECKASM_H +#define DAV1D_TESTS_CHECKASM_CHECKASM_H + +#include "config.h" + +#include +#include + +#if ARCH_X86_64 && defined(_WIN32) +/* setjmp/longjmp on 64-bit Windows will try to use SEH to unwind the stack, + * which doesn't work for assembly functions without unwind information. */ +#include +#define checkasm_context CONTEXT +#define checkasm_save_context() RtlCaptureContext(&checkasm_context_buf) +#define checkasm_load_context() RtlRestoreContext(&checkasm_context_buf, NULL) +#else +#include +#define checkasm_context jmp_buf +#define checkasm_save_context() setjmp(checkasm_context_buf) +#define checkasm_load_context() longjmp(checkasm_context_buf, 1) +#endif + +#include "include/common/attributes.h" +#include "include/common/bitdepth.h" +#include "include/common/intops.h" + +int xor128_rand(void); +#define rnd xor128_rand + +#define decl_check_bitfns(name) \ +name##_8bpc(void); \ +name##_16bpc(void) + +void checkasm_check_msac(void); +void checkasm_check_refmvs(void); +decl_check_bitfns(void checkasm_check_cdef); +decl_check_bitfns(void checkasm_check_filmgrain); +decl_check_bitfns(void checkasm_check_ipred); +decl_check_bitfns(void checkasm_check_itx); +decl_check_bitfns(void checkasm_check_loopfilter); +decl_check_bitfns(void checkasm_check_looprestoration); +decl_check_bitfns(void checkasm_check_mc); + +void *checkasm_check_func(void *func, const char *name, ...); +int checkasm_bench_func(void); +int checkasm_fail_func(const char *msg, ...); +void checkasm_update_bench(int iterations, uint64_t cycles); +void checkasm_report(const char *name, ...); +void checkasm_set_signal_handler_state(int enabled); +extern checkasm_context checkasm_context_buf; + +/* float compare utilities */ +int float_near_ulp(float a, float b, unsigned max_ulp); +int float_near_abs_eps(float a, float b, float eps); +int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp); +int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp, + int len); +int float_near_abs_eps_array(const float *a, const float *b, float eps, + int len); +int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps, + unsigned max_ulp, int len); + +#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */ + +/* Decide whether or not the specified function needs to be tested */ +#define check_func(func, ...)\ + (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__)) + +/* Declare the function prototype. The first argument is the return value, + * the remaining arguments are the function parameters. Naming parameters + * is optional. */ +#define declare_func(ret, ...)\ + declare_new(ret, __VA_ARGS__)\ + void *func_ref, *func_new;\ + typedef ret func_type(__VA_ARGS__);\ + checkasm_save_context() + +/* Indicate that the current test has failed */ +#define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__) + +/* Print the test outcome */ +#define report checkasm_report + +/* Call the reference function */ +#define call_ref(...)\ + (checkasm_set_signal_handler_state(1),\ + ((func_type *)func_ref)(__VA_ARGS__));\ + checkasm_set_signal_handler_state(0) + +#if HAVE_ASM +#if ARCH_X86 +#if defined(_MSC_VER) && !defined(__clang__) +#include +#define readtime() (_mm_lfence(), __rdtsc()) +#else +static inline uint64_t readtime(void) { + uint32_t eax, edx; + __asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx)); + return (((uint64_t)edx) << 32) | eax; +} +#define readtime readtime +#endif +#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__) +#include +#define readtime() mach_absolute_time() +#elif ARCH_AARCH64 +#ifdef _MSC_VER +#include +#define readtime() (_InstructionSynchronizationBarrier(), ReadTimeStampCounter()) +#else +static inline uint64_t readtime(void) { + uint64_t cycle_counter; + /* This requires enabling user mode access to the cycle counter (which + * can only be done from kernel space). + * This could also read cntvct_el0 instead of pmccntr_el0; that register + * might also be readable (depending on kernel version), but it has much + * worse precision (it's a fixed 50 MHz timer). */ + __asm__ __volatile__("isb\nmrs %0, pmccntr_el0" + : "=r"(cycle_counter) + :: "memory"); + return cycle_counter; +} +#define readtime readtime +#endif +#elif ARCH_ARM && !defined(_MSC_VER) && __ARM_ARCH >= 7 +static inline uint64_t readtime(void) { + uint32_t cycle_counter; + /* This requires enabling user mode access to the cycle counter (which + * can only be done from kernel space). */ + __asm__ __volatile__("isb\nmrc p15, 0, %0, c9, c13, 0" + : "=r"(cycle_counter) + :: "memory"); + return cycle_counter; +} +#define readtime readtime +#elif ARCH_PPC64LE +static inline uint64_t readtime(void) { + uint32_t tbu, tbl, temp; + + __asm__ __volatile__( + "1:\n" + "mfspr %2,269\n" + "mfspr %0,268\n" + "mfspr %1,269\n" + "cmpw %2,%1\n" + "bne 1b\n" + : "=r"(tbl), "=r"(tbu), "=r"(temp) + : + : "cc"); + + return (((uint64_t)tbu) << 32) | (uint64_t)tbl; +} +#define readtime readtime +#endif + +/* Verifies that clobbered callee-saved registers + * are properly saved and restored */ +void checkasm_checked_call(void *func, ...); + +#if ARCH_X86_64 +/* YMM and ZMM registers on x86 are turned off to save power when they haven't + * been used for some period of time. When they are used there will be a + * "warmup" period during which performance will be reduced and inconsistent + * which is problematic when trying to benchmark individual functions. We can + * work around this by periodically issuing "dummy" instructions that uses + * those registers to keep them powered on. */ +void checkasm_simd_warmup(void); + +/* The upper 32 bits of 32-bit data types are undefined when passed as function + * parameters. In practice those bits usually end up being zero which may hide + * certain bugs, such as using a register containing undefined bits as a pointer + * offset, so we want to intentionally clobber those bits with junk to expose + * any issues. The following set of macros automatically calculates a bitmask + * specifying which parameters should have their upper halves clobbered. */ +#ifdef _WIN32 +/* Integer and floating-point parameters share "register slots". */ +#define IGNORED_FP_ARGS 0 +#else +/* Up to 8 floating-point parameters are passed in XMM registers, which are + * handled orthogonally from integer parameters passed in GPR registers. */ +#define IGNORED_FP_ARGS 8 +#endif +#ifdef HAVE_C11_GENERIC +#define clobber_type(arg) _Generic((void (*)(void*, arg))NULL,\ + void (*)(void*, int32_t ): clobber_mask |= 1 << mpos++,\ + void (*)(void*, uint32_t): clobber_mask |= 1 << mpos++,\ + void (*)(void*, float ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\ + void (*)(void*, double ): mpos += (fp_args++ >= IGNORED_FP_ARGS),\ + default: mpos++) +#define init_clobber_mask(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, ...)\ + unsigned clobber_mask = 0;\ + {\ + int mpos = 0, fp_args = 0;\ + clobber_type(a); clobber_type(b); clobber_type(c); clobber_type(d);\ + clobber_type(e); clobber_type(f); clobber_type(g); clobber_type(h);\ + clobber_type(i); clobber_type(j); clobber_type(k); clobber_type(l);\ + clobber_type(m); clobber_type(n); clobber_type(o); clobber_type(p);\ + } +#else +/* Skip parameter clobbering on compilers without support for _Generic() */ +#define init_clobber_mask(...) unsigned clobber_mask = 0 +#endif +#define declare_new(ret, ...)\ + ret (*checked_call)(__VA_ARGS__, int, int, int, int, int, int, int,\ + int, int, int, int, int, int, int, int, int,\ + void*, unsigned) =\ + (void*)checkasm_checked_call;\ + init_clobber_mask(__VA_ARGS__, void*, void*, void*, void*,\ + void*, void*, void*, void*, void*, void*,\ + void*, void*, void*, void*, void*); +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + checkasm_simd_warmup(),\ + checked_call(__VA_ARGS__, 16, 15, 14, 13, 12, 11, 10, 9, 8,\ + 7, 6, 5, 4, 3, 2, 1, func_new, clobber_mask));\ + checkasm_set_signal_handler_state(0) +#elif ARCH_X86_32 +#define declare_new(ret, ...)\ + ret (*checked_call)(void *, __VA_ARGS__, int, int, int, int, int, int,\ + int, int, int, int, int, int, int, int, int) =\ + (void *)checkasm_checked_call; +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + checked_call(func_new, __VA_ARGS__, 15, 14, 13, 12,\ + 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));\ + checkasm_set_signal_handler_state(0) +#elif ARCH_ARM +/* Use a dummy argument, to offset the real parameters by 2, not only 1. + * This makes sure that potential 8-byte-alignment of parameters is kept + * the same even when the extra parameters have been removed. */ +void checkasm_checked_call_vfp(void *func, int dummy, ...); +#define declare_new(ret, ...)\ + ret (*checked_call)(void *, int dummy, __VA_ARGS__,\ + int, int, int, int, int, int, int, int,\ + int, int, int, int, int, int, int) =\ + (void *)checkasm_checked_call_vfp; +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + checked_call(func_new, 0, __VA_ARGS__, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0));\ + checkasm_set_signal_handler_state(0) +#elif ARCH_AARCH64 && !defined(__APPLE__) +void checkasm_stack_clobber(uint64_t clobber, ...); +#define declare_new(ret, ...)\ + ret (*checked_call)(void *, int, int, int, int, int, int, int,\ + __VA_ARGS__, int, int, int, int, int, int, int, int,\ + int, int, int, int, int, int, int) =\ + (void *)checkasm_checked_call; +#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\ + CLOB, CLOB, CLOB, CLOB, CLOB),\ + checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__,\ + 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0));\ + checkasm_set_signal_handler_state(0) +#else +#define declare_new(ret, ...) +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + ((func_type *)func_new)(__VA_ARGS__));\ + checkasm_set_signal_handler_state(0) +#endif +#else /* HAVE_ASM */ +#define declare_new(ret, ...) +/* Call the function */ +#define call_new(...)\ + (checkasm_set_signal_handler_state(1),\ + ((func_type *)func_new)(__VA_ARGS__));\ + checkasm_set_signal_handler_state(0) +#endif /* HAVE_ASM */ + +/* Benchmark the function */ +#ifdef readtime +#define bench_new(...)\ + do {\ + if (checkasm_bench_func()) {\ + func_type *const tfunc = func_new;\ + checkasm_set_signal_handler_state(1);\ + uint64_t tsum = 0;\ + int tcount = 0;\ + for (int ti = 0; ti < BENCH_RUNS; ti++) {\ + uint64_t t = readtime();\ + int talt = 0; (void)talt;\ + tfunc(__VA_ARGS__);\ + talt = 1;\ + tfunc(__VA_ARGS__);\ + talt = 0;\ + tfunc(__VA_ARGS__);\ + talt = 1;\ + tfunc(__VA_ARGS__);\ + t = readtime() - t;\ + if (t*tcount <= tsum*4 && ti > 0) {\ + tsum += t;\ + tcount++;\ + }\ + }\ + checkasm_set_signal_handler_state(0);\ + checkasm_update_bench(tcount, tsum);\ + } else {\ + const int talt = 0; (void)talt;\ + call_new(__VA_ARGS__);\ + }\ + } while (0) +#else +#define bench_new(...) do {} while (0) +#endif + +/* Alternates between two pointers. Intended to be used within bench_new() + * calls for functions which modifies their input buffer(s) to ensure that + * throughput, and not latency, is measured. */ +#define alternate(a, b) (talt ? (b) : (a)) + +#define ROUND_UP(x,a) (((x)+((a)-1)) & ~((a)-1)) +#define PIXEL_RECT(name, w, h) \ + ALIGN_STK_64(pixel, name##_buf, ((h)+32)*(ROUND_UP(w,64)+64) + 64,); \ + ptrdiff_t name##_stride = sizeof(pixel)*(ROUND_UP(w,64)+64); \ + (void)name##_stride; \ + pixel *name = name##_buf + (ROUND_UP(w,64)+64)*16 + 64 + +#define CLEAR_PIXEL_RECT(name) \ + memset(name##_buf, 0x99, sizeof(name##_buf)) \ + +#define DECL_CHECKASM_CHECK_FUNC(type) \ +int checkasm_check_##type(const char *const file, const int line, \ + const type *const buf1, const ptrdiff_t stride1, \ + const type *const buf2, const ptrdiff_t stride2, \ + const int w, const int h, const char *const name, \ + const int align_w, const int align_h, \ + const int padding) + +DECL_CHECKASM_CHECK_FUNC(int8_t); +DECL_CHECKASM_CHECK_FUNC(int16_t); +DECL_CHECKASM_CHECK_FUNC(int32_t); +DECL_CHECKASM_CHECK_FUNC(uint8_t); +DECL_CHECKASM_CHECK_FUNC(uint16_t); +DECL_CHECKASM_CHECK_FUNC(uint32_t); + +#define CONCAT(a,b) a ## b + +#define checkasm_check2(prefix, ...) CONCAT(checkasm_check_, prefix)(__FILE__, __LINE__, __VA_ARGS__) +#define checkasm_check(prefix, ...) checkasm_check2(prefix, __VA_ARGS__, 0, 0, 0) + +#ifdef BITDEPTH +#define checkasm_check_pixel(...) checkasm_check(PIXEL_TYPE, __VA_ARGS__) +#define checkasm_check_pixel_padded(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 1, 1, 8) +#define checkasm_check_pixel_padded_align(...) checkasm_check2(PIXEL_TYPE, __VA_ARGS__, 8) +#define checkasm_check_coef(...) checkasm_check(COEF_TYPE, __VA_ARGS__) +#endif + +#endif /* DAV1D_TESTS_CHECKASM_CHECKASM_H */ diff --git a/media/video/av1/config.h b/media/video/av1/config.h new file mode 100644 index 00000000..912f77e4 --- /dev/null +++ b/media/video/av1/config.h @@ -0,0 +1,45 @@ +/* + * Autogenerated by the Meson build system. + * Do not edit, your changes will be lost. + */ + +#pragma once + +#define ARCH_AARCH64 0 + +#define ARCH_ARM 0 + +#define ARCH_PPC64LE 1 + +#define ARCH_X86 0 + +#define ARCH_X86_32 0 + +#define ARCH_X86_64 0 + +#define CONFIG_16BPC 1 + +#define CONFIG_8BPC 1 + +#define CONFIG_LOG 1 + +#define ENDIANNESS_BIG 0 + +#define HAVE_ASM 1 + +#define HAVE_C11_GENERIC 1 + +#define HAVE_CLOCK_GETTIME 1 + +#define HAVE_DLSYM 1 + +#define HAVE_GETAUXVAL 1 + +#define HAVE_POSIX_MEMALIGN 1 + +#define HAVE_PTHREAD_GETAFFINITY_NP 1 + +#define HAVE_UNISTD_H 1 + +#define TRIM_DSP_FUNCTIONS 0 + diff --git a/media/video/av1/include/common/attributes.h b/media/video/av1/include/common/attributes.h new file mode 100644 index 00000000..c176b1cc --- /dev/null +++ b/media/video/av1/include/common/attributes.h @@ -0,0 +1,199 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_ATTRIBUTES_H +#define DAV1D_COMMON_ATTRIBUTES_H + +#include "config.h" + +#include +#include + +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#ifdef __GNUC__ +#define ATTR_ALIAS __attribute__((may_alias)) +#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr))) +#define COLD __attribute__((cold)) +#else +#define ATTR_ALIAS +#define ATTR_FORMAT_PRINTF(fmt, attr) +#define COLD +#endif + +#if ARCH_X86_64 +/* x86-64 needs 32- and 64-byte alignment for AVX2 and AVX-512. */ +#define ALIGN_64_VAL 64 +#define ALIGN_32_VAL 32 +#define ALIGN_16_VAL 16 +#elif ARCH_X86_32 || ARCH_ARM || ARCH_AARCH64 || ARCH_PPC64LE +/* ARM doesn't benefit from anything more than 16-byte alignment. */ +#define ALIGN_64_VAL 16 +#define ALIGN_32_VAL 16 +#define ALIGN_16_VAL 16 +#else +/* No need for extra alignment on platforms without assembly. */ +#define ALIGN_64_VAL 8 +#define ALIGN_32_VAL 8 +#define ALIGN_16_VAL 8 +#endif + +/* + * API for variables, struct members (ALIGN()) like: + * uint8_t var[1][2][3][4] + * becomes: + * ALIGN(uint8_t var[1][2][3][4], alignment). + */ +#ifdef _MSC_VER +#define ALIGN(ll, a) \ + __declspec(align(a)) ll +#else +#define ALIGN(line, align) \ + line __attribute__((aligned(align))) +#endif + +/* + * API for stack alignment (ALIGN_STK_$align()) of variables like: + * uint8_t var[1][2][3][4] + * becomes: + * ALIGN_STK_$align(uint8_t, var, 1, [2][3][4]) + */ +#define ALIGN_STK_64(type, var, sz1d, sznd) \ + ALIGN(type var[sz1d]sznd, ALIGN_64_VAL) +#define ALIGN_STK_32(type, var, sz1d, sznd) \ + ALIGN(type var[sz1d]sznd, ALIGN_32_VAL) +#define ALIGN_STK_16(type, var, sz1d, sznd) \ + ALIGN(type var[sz1d]sznd, ALIGN_16_VAL) + +/* + * Forbid inlining of a function: + * static NOINLINE void func() {} + */ +#ifdef _MSC_VER +#define NOINLINE __declspec(noinline) +#elif __has_attribute(noclone) +#define NOINLINE __attribute__((noinline, noclone)) +#else +#define NOINLINE __attribute__((noinline)) +#endif + +#ifdef _MSC_VER +#define ALWAYS_INLINE __forceinline +#else +#define ALWAYS_INLINE __attribute__((always_inline)) inline +#endif +/* +#if (defined(__ELF__) || defined(__MACH__) || (defined(_WIN32) && defined(__clang__))) && __has_attribute(visibility) +#define EXTERN extern __attribute__((visibility("hidden"))) +#else */ +#define EXTERN extern +//#endif + +#ifdef __clang__ +#define NO_SANITIZE(x) __attribute__((no_sanitize(x))) +#else +#define NO_SANITIZE(x) +#endif + +#if defined(NDEBUG) && (defined(__GNUC__) || defined(__clang__)) +#undef assert +#define assert(x) do { if (!(x)) __builtin_unreachable(); } while (0) +#elif defined(NDEBUG) && defined(_MSC_VER) +#undef assert +#define assert __assume +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) +# define dav1d_uninit(x) x=x +#else +# define dav1d_uninit(x) x +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#include + +static inline int ctz(const unsigned int mask) { + unsigned long idx; + _BitScanForward(&idx, mask); + return idx; +} + +static inline int clz(const unsigned int mask) { + unsigned long leading_zero = 0; + _BitScanReverse(&leading_zero, mask); + return (31 - leading_zero); +} + +#ifdef _WIN64 +static inline int clzll(const unsigned long long mask) { + unsigned long leading_zero = 0; + _BitScanReverse64(&leading_zero, mask); + return (63 - leading_zero); +} +#else /* _WIN64 */ +static inline int clzll(const unsigned long long mask) { + if (mask >> 32) + return clz((unsigned)(mask >> 32)); + else + return clz((unsigned)mask) + 32; +} +#endif /* _WIN64 */ +#else /* !_MSC_VER */ +static inline int ctz(const unsigned int mask) { + return __builtin_ctz(mask); +} + +static inline int clz(const unsigned int mask) { + return __builtin_clz(mask); +} + +static inline int clzll(const unsigned long long mask) { + return __builtin_clzll(mask); +} +#endif /* !_MSC_VER */ + +#ifndef static_assert +#define CHECK_OFFSET(type, field, name) \ + struct check_##type##_##field { int x[(name == offsetof(type, field)) ? 1 : -1]; } +#else +#define CHECK_OFFSET(type, field, name) \ + static_assert(name == offsetof(type, field), #field) +#endif + +#ifdef _MSC_VER +#define PACKED(...) __pragma(pack(push, 1)) __VA_ARGS__ __pragma(pack(pop)) +#else +#define PACKED(...) __VA_ARGS__ __attribute__((__packed__)) +#endif + +#endif /* DAV1D_COMMON_ATTRIBUTES_H */ diff --git a/media/video/av1/include/common/bitdepth.h b/media/video/av1/include/common/bitdepth.h new file mode 100644 index 00000000..88a822aa --- /dev/null +++ b/media/video/av1/include/common/bitdepth.h @@ -0,0 +1,93 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_BITDEPTH_H +#define DAV1D_COMMON_BITDEPTH_H + +#include +#include + +#include "common/attributes.h" + +#if !defined(BITDEPTH) +typedef void pixel; +typedef void coef; +#define HIGHBD_DECL_SUFFIX /* nothing */ +#define HIGHBD_CALL_SUFFIX /* nothing */ +#define HIGHBD_TAIL_SUFFIX /* nothing */ +#elif BITDEPTH == 8 +typedef uint8_t pixel; +typedef int16_t coef; +#define PIXEL_TYPE uint8_t +#define COEF_TYPE int16_t +#define pixel_copy memcpy +#define pixel_set memset +#define iclip_pixel iclip_u8 +#define PIX_HEX_FMT "%02x" +#define bitfn(x) x##_8bpc +#define BF(x, suffix) x##_8bpc_##suffix +#define PXSTRIDE(x) (x) +#define highbd_only(x) +#define HIGHBD_DECL_SUFFIX /* nothing */ +#define HIGHBD_CALL_SUFFIX /* nothing */ +#define HIGHBD_TAIL_SUFFIX /* nothing */ +#define bitdepth_from_max(x) 8 +#define BITDEPTH_MAX 0xff +#elif BITDEPTH == 16 +typedef uint16_t pixel; +typedef int32_t coef; +#define PIXEL_TYPE uint16_t +#define COEF_TYPE int32_t +#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1) +static inline void pixel_set(pixel *const dst, const int val, const int num) { + for (int n = 0; n < num; n++) + dst[n] = val; +} +#define PIX_HEX_FMT "%03x" +#define iclip_pixel(x) iclip(x, 0, bitdepth_max) +#define HIGHBD_DECL_SUFFIX , const int bitdepth_max +#define HIGHBD_CALL_SUFFIX , f->bitdepth_max +#define HIGHBD_TAIL_SUFFIX , bitdepth_max +#define bitdepth_from_max(bitdepth_max) (32 - clz(bitdepth_max)) +#define BITDEPTH_MAX bitdepth_max +#define bitfn(x) x##_16bpc +#define BF(x, suffix) x##_16bpc_##suffix +static inline ptrdiff_t PXSTRIDE(const ptrdiff_t x) { + assert(!(x & 1)); + return x >> 1; +} +#define highbd_only(x) x +#else +#error invalid value for bitdepth +#endif +#define bytefn(x) bitfn(x) + +#define bitfn_decls(name, ...) \ +name##_8bpc(__VA_ARGS__); \ +name##_16bpc(__VA_ARGS__) + +#endif /* DAV1D_COMMON_BITDEPTH_H */ diff --git a/media/video/av1/include/common/dump.h b/media/video/av1/include/common/dump.h new file mode 100644 index 00000000..9ffab6a4 --- /dev/null +++ b/media/video/av1/include/common/dump.h @@ -0,0 +1,92 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_DUMP_H +#define DAV1D_COMMON_DUMP_H + +#include +#include +#include + +#include "common/bitdepth.h" + +static inline void append_plane_to_file(const pixel *buf, ptrdiff_t stride, + int w, int h, const char *const file) +{ + FILE *const f = fopen(file, "ab"); + while (h--) { + fwrite(buf, w * sizeof(pixel), 1, f); + buf += PXSTRIDE(stride); + } + fclose(f); +} + +static inline void hex_fdump(FILE *out, const pixel *buf, ptrdiff_t stride, + int w, int h, const char *what) +{ + fprintf(out, "%s\n", what); + while (h--) { + int x; + for (x = 0; x < w; x++) + fprintf(out, " " PIX_HEX_FMT, buf[x]); + buf += PXSTRIDE(stride); + fprintf(out, "\n"); + } +} + +static inline void hex_dump(const pixel *buf, ptrdiff_t stride, + int w, int h, const char *what) +{ + hex_fdump(stdout, buf, stride, w, h, what); +} + +static inline void coef_dump(const coef *buf, const int w, const int h, + const int len, const char *what) +{ + int y; + printf("%s\n", what); + for (y = 0; y < h; y++) { + int x; + for (x = 0; x < w; x++) + printf(" %*d", len, buf[x]); + buf += w; + printf("\n"); + } +} + +static inline void ac_dump(const int16_t *buf, int w, int h, const char *what) +{ + printf("%s\n", what); + while (h--) { + for (int x = 0; x < w; x++) + printf(" %03d", buf[x]); + buf += w; + printf("\n"); + } +} + +#endif /* DAV1D_COMMON_DUMP_H */ diff --git a/media/video/av1/include/common/intops.h b/media/video/av1/include/common/intops.h new file mode 100644 index 00000000..2d21998b --- /dev/null +++ b/media/video/av1/include/common/intops.h @@ -0,0 +1,84 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_INTOPS_H +#define DAV1D_COMMON_INTOPS_H + +#include + +#include "common/attributes.h" + +static inline int imax(const int a, const int b) { + return a > b ? a : b; +} + +static inline int imin(const int a, const int b) { + return a < b ? a : b; +} + +static inline unsigned umax(const unsigned a, const unsigned b) { + return a > b ? a : b; +} + +static inline unsigned umin(const unsigned a, const unsigned b) { + return a < b ? a : b; +} + +static inline int iclip(const int v, const int min, const int max) { + return v < min ? min : v > max ? max : v; +} + +static inline int iclip_u8(const int v) { + return iclip(v, 0, 255); +} + +static inline int apply_sign(const int v, const int s) { + return s < 0 ? -v : v; +} + +static inline int apply_sign64(const int v, const int64_t s) { + return s < 0 ? -v : v; +} + +static inline int ulog2(const unsigned v) { + return 31 - clz(v); +} + +static inline int u64log2(const uint64_t v) { + return 63 - clzll(v); +} + +static inline unsigned inv_recenter(const unsigned r, const unsigned v) { + if (v > (r << 1)) + return v; + else if ((v & 1) == 0) + return (v >> 1) + r; + else + return r - ((v + 1) >> 1); +} + +#endif /* DAV1D_COMMON_INTOPS_H */ diff --git a/media/video/av1/include/common/validate.h b/media/video/av1/include/common/validate.h new file mode 100644 index 00000000..3096f3db --- /dev/null +++ b/media/video/av1/include/common/validate.h @@ -0,0 +1,59 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_VALIDATE_H +#define DAV1D_COMMON_VALIDATE_H + +#include +#include + +#if defined(NDEBUG) +#define debug_abort() +#else +#define debug_abort abort +#endif + +#define validate_input_or_ret_with_msg(x, r, ...) \ + if (!(x)) { \ + fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \ + #x, __func__); \ + fprintf(stderr, __VA_ARGS__); \ + debug_abort(); \ + return r; \ + } + +#define validate_input_or_ret(x, r) \ + if (!(x)) { \ + fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \ + #x, __func__); \ + debug_abort(); \ + return r; \ + } + +#define validate_input(x) validate_input_or_ret(x, ) + +#endif /* DAV1D_COMMON_VALIDATE_H */ diff --git a/media/video/av1/include/dav1d/common.h b/media/video/av1/include/dav1d/common.h new file mode 100644 index 00000000..8685b4f0 --- /dev/null +++ b/media/video/av1/include/dav1d/common.h @@ -0,0 +1,86 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_COMMON_H +#define DAV1D_COMMON_H + +#include +#include +#include + +#ifndef DAV1D_API + #if defined _WIN32 + #if defined DAV1D_BUILDING_DLL + #define DAV1D_API __declspec(dllexport) + #else + #define DAV1D_API + #endif + #else + #if __GNUC__ >= 4 + #define DAV1D_API __attribute__ ((visibility ("default"))) + #else + #define DAV1D_API + #endif + #endif +#endif + +#if EPERM > 0 +#define DAV1D_ERR(e) (-(e)) ///< Negate POSIX error code. +#else +#define DAV1D_ERR(e) (e) +#endif + +/** + * A reference-counted object wrapper for a user-configurable pointer. + */ +typedef struct Dav1dUserData { + const uint8_t *data; ///< data pointer + struct Dav1dRef *ref; ///< allocation origin +} Dav1dUserData; + +/** + * Input packet metadata which are copied from the input data used to + * decode each image into the matching structure of the output image + * returned back to the user. Since these are metadata fields, they + * can be used for other purposes than the documented ones, they will + * still be passed from input data to output picture without being + * used internally. + */ +typedef struct Dav1dDataProps { + int64_t timestamp; ///< container timestamp of input data, INT64_MIN if unknown (default) + int64_t duration; ///< container duration of input data, 0 if unknown (default) + int64_t offset; ///< stream offset of input data, -1 if unknown (default) + size_t size; ///< packet size, default Dav1dData.sz + struct Dav1dUserData user_data; ///< user-configurable data, default NULL members +} Dav1dDataProps; + +/** + * Release reference to a Dav1dDataProps. + */ +DAV1D_API void dav1d_data_props_unref(Dav1dDataProps *props); + +#endif /* DAV1D_COMMON_H */ diff --git a/media/video/av1/include/dav1d/data.h b/media/video/av1/include/dav1d/data.h new file mode 100644 index 00000000..f945a042 --- /dev/null +++ b/media/video/av1/include/dav1d/data.h @@ -0,0 +1,109 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_DATA_H +#define DAV1D_DATA_H + +#include +#include + +#include "common.h" + +typedef struct Dav1dData { + const uint8_t *data; ///< data pointer + size_t sz; ///< data size + struct Dav1dRef *ref; ///< allocation origin + Dav1dDataProps m; ///< user provided metadata passed to the output picture +} Dav1dData; + +/** + * Allocate data. + * + * @param data Input context. + * @param sz Size of the data that should be allocated. + * + * @return Pointer to the allocated buffer on success. NULL on error. + */ +DAV1D_API uint8_t * dav1d_data_create(Dav1dData *data, size_t sz); + +/** + * Wrap an existing data array. + * + * @param data Input context. + * @param buf The data to be wrapped. + * @param sz Size of the data. + * @param free_callback Function to be called when we release our last + * reference to this data. In this callback, $buf will be + * the $buf argument to this function, and $cookie will + * be the $cookie input argument to this function. + * @param cookie Opaque parameter passed to free_callback(). + * + * @return 0 on success. A negative DAV1D_ERR value on error. + */ +DAV1D_API int dav1d_data_wrap(Dav1dData *data, const uint8_t *buf, size_t sz, + void (*free_callback)(const uint8_t *buf, void *cookie), + void *cookie); + +/** + * Wrap a user-provided data pointer into a reference counted object. + * + * data->m.user_data field will initialized to wrap the provided $user_data + * pointer. + * + * $free_callback will be called on the same thread that released the last + * reference. If frame threading is used, make sure $free_callback is + * thread-safe. + * + * @param data Input context. + * @param user_data The user data to be wrapped. + * @param free_callback Function to be called when we release our last + * reference to this data. In this callback, $user_data + * will be the $user_data argument to this function, and + * $cookie will be the $cookie input argument to this + * function. + * @param cookie Opaque parameter passed to $free_callback. + * + * @return 0 on success. A negative DAV1D_ERR value on error. + */ +DAV1D_API int dav1d_data_wrap_user_data(Dav1dData *data, + const uint8_t *user_data, + void (*free_callback)(const uint8_t *user_data, + void *cookie), + void *cookie); + +/** + * Free the data reference. + * + * The reference count for data->m.user_data will be decremented (if it has been + * initialized with dav1d_data_wrap_user_data). The $data object will be memset + * to 0. + * + * @param data Input context. + */ +DAV1D_API void dav1d_data_unref(Dav1dData *data); + +#endif /* DAV1D_DATA_H */ diff --git a/media/video/av1/include/dav1d/dav1d.h b/media/video/av1/include/dav1d/dav1d.h new file mode 100644 index 00000000..da71e58e --- /dev/null +++ b/media/video/av1/include/dav1d/dav1d.h @@ -0,0 +1,309 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_H +#define DAV1D_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "common.h" +#include "picture.h" +#include "data.h" +#include "version.h" + +typedef struct Dav1dContext Dav1dContext; +typedef struct Dav1dRef Dav1dRef; + +#define DAV1D_MAX_THREADS 256 +#define DAV1D_MAX_FRAME_DELAY 256 + +typedef struct Dav1dLogger { + void *cookie; ///< Custom data to pass to the callback. + /** + * Logger callback. May be NULL to disable logging. + * + * @param cookie Custom pointer passed to all calls. + * @param format The vprintf compatible format string. + * @param ap List of arguments referenced by the format string. + */ + void (*callback)(void *cookie, const char *format, va_list ap); +} Dav1dLogger; + +enum Dav1dInloopFilterType { + DAV1D_INLOOPFILTER_NONE = 0, + DAV1D_INLOOPFILTER_DEBLOCK = 1 << 0, + DAV1D_INLOOPFILTER_CDEF = 1 << 1, + DAV1D_INLOOPFILTER_RESTORATION = 1 << 2, + DAV1D_INLOOPFILTER_ALL = DAV1D_INLOOPFILTER_DEBLOCK | + DAV1D_INLOOPFILTER_CDEF | + DAV1D_INLOOPFILTER_RESTORATION, +}; + +typedef struct Dav1dSettings { + int n_threads; ///< number of threads (0 = number of logical cores in host system, default 0) + int max_frame_delay; ///< Set to 1 for low-latency decoding (0 = ceil(sqrt(n_threads)), default 0) + int apply_grain; ///< whether to apply film grain on output frames (default 1) + int operating_point; ///< select an operating point for scalable AV1 bitstreams (0 - 31, default 0) + int all_layers; ///< output all spatial layers of a scalable AV1 biststream (default 1) + unsigned frame_size_limit; ///< maximum frame size, in pixels (0 = unlimited, default 0) + Dav1dPicAllocator allocator; ///< Picture allocator callback. + Dav1dLogger logger; ///< Logger callback. + int strict_std_compliance; ///< whether to abort decoding on standard compliance violations + ///< that don't affect actual bitstream decoding (e.g. inconsistent + ///< or invalid metadata, default 0) + int output_invisible_frames; ///< output invisibly coded frames (in coding order) in addition + ///< to all visible frames. Because of show-existing-frame, this + ///< means some frames may appear twice (once when coded, + ///< once when shown, default 0) + enum Dav1dInloopFilterType inloop_filters; ///< postfilters to enable during decoding (default + ///< DAV1D_INLOOPFILTER_ALL) + uint8_t reserved[20]; ///< reserved for future use +} Dav1dSettings; + +/** + * Get library version. + */ +DAV1D_API const char *dav1d_version(void); + +/** + * Initialize settings to default values. + * + * @param s Input settings context. + */ +DAV1D_API void dav1d_default_settings(Dav1dSettings *s); + +/** + * Allocate and open a decoder instance. + * + * @param c_out The decoder instance to open. *c_out will be set to the + * allocated context. + * @param s Input settings context. + * + * @note The context must be freed using dav1d_close() when decoding is + * finished. + * + * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error. + */ +DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s); + +/** + * Parse a Sequence Header OBU from bitstream data. + * + * @param out Output Sequence Header. + * @param buf The data to be parser. + * @param sz Size of the data. + * + * @return + * 0: Success, and out is filled with the parsed Sequence Header + * OBU parameters. + * DAV1D_ERR(ENOENT): No Sequence Header OBUs were found in the buffer. + * other negative DAV1D_ERR codes: Invalid data in the buffer, invalid passed-in + * arguments, and other errors during parsing. + * + * @note It is safe to feed this function data containing other OBUs than a + * Sequence Header, as they will simply be ignored. If there is more than + * one Sequence Header OBU present, only the last will be returned. + */ +DAV1D_API int dav1d_parse_sequence_header(Dav1dSequenceHeader *out, + const uint8_t *buf, const size_t sz); + +/** + * Feed bitstream data to the decoder. + * + * @param c Input decoder instance. + * @param in Input bitstream data. On success, ownership of the reference is + * passed to the library. + * + * @return + * 0: Success, and the data was consumed. + * DAV1D_ERR(EAGAIN): The data can't be consumed. dav1d_get_picture() should + * be called to get one or more frames before the function + * can consume new data. + * other negative DAV1D_ERR codes: Error during decoding or because of invalid + * passed-in arguments. + */ +DAV1D_API int dav1d_send_data(Dav1dContext *c, Dav1dData *in); + +/** + * Return a decoded picture. + * + * @param c Input decoder instance. + * @param out Output frame. The caller assumes ownership of the returned + * reference. + * + * @return + * 0: Success, and a frame is returned. + * DAV1D_ERR(EAGAIN): Not enough data to output a frame. dav1d_send_data() + * should be called with new input. + * other negative DAV1D_ERR codes: Error during decoding or because of invalid + * passed-in arguments. + * + * @note To drain buffered frames from the decoder (i.e. on end of stream), + * call this function until it returns DAV1D_ERR(EAGAIN). + * + * @code{.c} + * Dav1dData data = { 0 }; + * Dav1dPicture p = { 0 }; + * int res; + * + * read_data(&data); + * do { + * res = dav1d_send_data(c, &data); + * // Keep going even if the function can't consume the current data + * packet. It eventually will after one or more frames have been + * returned in this loop. + * if (res < 0 && res != DAV1D_ERR(EAGAIN)) + * free_and_abort(); + * res = dav1d_get_picture(c, &p); + * if (res < 0) { + * if (res != DAV1D_ERR(EAGAIN)) + * free_and_abort(); + * } else + * output_and_unref_picture(&p); + * // Stay in the loop as long as there's data to consume. + * } while (data.sz || read_data(&data) == SUCCESS); + * + * // Handle EOS by draining all buffered frames. + * do { + * res = dav1d_get_picture(c, &p); + * if (res < 0) { + * if (res != DAV1D_ERR(EAGAIN)) + * free_and_abort(); + * } else + * output_and_unref_picture(&p); + * } while (res == 0); + * @endcode + */ +DAV1D_API int dav1d_get_picture(Dav1dContext *c, Dav1dPicture *out); + +/** + * Apply film grain to a previously decoded picture. If the picture contains no + * film grain metadata, then this function merely returns a new reference. + * + * @param c Input decoder instance. + * @param out Output frame. The caller assumes ownership of the returned + * reference. + * @param in Input frame. No ownership is transferred. + * + * @return + * 0: Success, and a frame is returned. + * other negative DAV1D_ERR codes: Error due to lack of memory or because of + * invalid passed-in arguments. + * + * @note If `Dav1dSettings.apply_grain` is true, film grain was already applied + * by `dav1d_get_picture`, and so calling this function leads to double + * application of film grain. Users should only call this when needed. + */ +DAV1D_API int dav1d_apply_grain(Dav1dContext *c, Dav1dPicture *out, + const Dav1dPicture *in); + +/** + * Close a decoder instance and free all associated memory. + * + * @param c_out The decoder instance to close. *c_out will be set to NULL. + */ +DAV1D_API void dav1d_close(Dav1dContext **c_out); + +/** + * Flush all delayed frames in decoder and clear internal decoder state, + * to be used when seeking. + * + * @param c Input decoder instance. + * + * @note Decoding will start only after a valid sequence header OBU is + * delivered to dav1d_send_data(). + * + */ +DAV1D_API void dav1d_flush(Dav1dContext *c); + +enum Dav1dEventFlags { + /** + * The last returned picture contains a reference to a new Sequence Header, + * either because it's the start of a new coded sequence, or the decoder was + * flushed before it was generated. + */ + DAV1D_EVENT_FLAG_NEW_SEQUENCE = 1 << 0, + /** + * The last returned picture contains a reference to a Sequence Header with + * new operating parameters information for the current coded sequence. + */ + DAV1D_EVENT_FLAG_NEW_OP_PARAMS_INFO = 1 << 1, +}; + +/** + * Fetch a combination of DAV1D_EVENT_FLAG_* event flags generated by the decoding + * process. + * + * @param c Input decoder instance. + * @param flags Where to write the flags. + * + * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error. + * + * @note Calling this function will clear all the event flags currently stored in + * the decoder. + * + */ +DAV1D_API int dav1d_get_event_flags(Dav1dContext *c, enum Dav1dEventFlags *flags); + +/** + * Retrieve the user-provided metadata associated with the input data packet + * for the last decoding error reported to the user, i.e. a negative return + * value (not EAGAIN) from dav1d_send_data() or dav1d_get_picture(). + * + * @param c Input decoder instance. + * @param out Output Dav1dDataProps. On success, the caller assumes ownership of + * the returned reference. + * + * @return 0 on success, or < 0 (a negative DAV1D_ERR code) on error. + */ +DAV1D_API int dav1d_get_decode_error_data_props(Dav1dContext *c, Dav1dDataProps *out); + +/** + * Get the decoder delay, which is the number of internally buffered frames, not + * including reference frames. + * This value is guaranteed to be >= 1 and <= max_frame_delay. + * + * @param s Input settings context. + * + * @return Decoder frame delay on success, or < 0 (a negative DAV1D_ERR code) on + * error. + * + * @note The returned delay is valid only for a Dav1dContext initialized with the + * provided Dav1dSettings. + */ +DAV1D_API int dav1d_get_frame_delay(const Dav1dSettings *s); + +# ifdef __cplusplus +} +# endif + +#endif /* DAV1D_H */ diff --git a/media/video/av1/include/dav1d/headers.h b/media/video/av1/include/dav1d/headers.h new file mode 100644 index 00000000..e2f7aa07 --- /dev/null +++ b/media/video/av1/include/dav1d/headers.h @@ -0,0 +1,435 @@ +/* + * Copyright © 2018-2020, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_HEADERS_H +#define DAV1D_HEADERS_H + +#include +#include + +// Constants from Section 3. "Symbols and abbreviated terms" +#define DAV1D_MAX_CDEF_STRENGTHS 8 +#define DAV1D_MAX_OPERATING_POINTS 32 +#define DAV1D_MAX_TILE_COLS 64 +#define DAV1D_MAX_TILE_ROWS 64 +#define DAV1D_MAX_SEGMENTS 8 +#define DAV1D_NUM_REF_FRAMES 8 +#define DAV1D_PRIMARY_REF_NONE 7 +#define DAV1D_REFS_PER_FRAME 7 +#define DAV1D_TOTAL_REFS_PER_FRAME (DAV1D_REFS_PER_FRAME + 1) + +enum Dav1dObuType { + DAV1D_OBU_SEQ_HDR = 1, + DAV1D_OBU_TD = 2, + DAV1D_OBU_FRAME_HDR = 3, + DAV1D_OBU_TILE_GRP = 4, + DAV1D_OBU_METADATA = 5, + DAV1D_OBU_FRAME = 6, + DAV1D_OBU_REDUNDANT_FRAME_HDR = 7, + DAV1D_OBU_PADDING = 15, +}; + +enum Dav1dTxfmMode { + DAV1D_TX_4X4_ONLY, + DAV1D_TX_LARGEST, + DAV1D_TX_SWITCHABLE, + DAV1D_N_TX_MODES, +}; + +enum Dav1dFilterMode { + DAV1D_FILTER_8TAP_REGULAR, + DAV1D_FILTER_8TAP_SMOOTH, + DAV1D_FILTER_8TAP_SHARP, + DAV1D_N_SWITCHABLE_FILTERS, + DAV1D_FILTER_BILINEAR = DAV1D_N_SWITCHABLE_FILTERS, + DAV1D_N_FILTERS, + DAV1D_FILTER_SWITCHABLE = DAV1D_N_FILTERS, +}; + +enum Dav1dAdaptiveBoolean { + DAV1D_OFF = 0, + DAV1D_ON = 1, + DAV1D_ADAPTIVE = 2, +}; + +enum Dav1dRestorationType { + DAV1D_RESTORATION_NONE, + DAV1D_RESTORATION_SWITCHABLE, + DAV1D_RESTORATION_WIENER, + DAV1D_RESTORATION_SGRPROJ, +}; + +enum Dav1dWarpedMotionType { + DAV1D_WM_TYPE_IDENTITY, + DAV1D_WM_TYPE_TRANSLATION, + DAV1D_WM_TYPE_ROT_ZOOM, + DAV1D_WM_TYPE_AFFINE, +}; + +typedef struct Dav1dWarpedMotionParams { + enum Dav1dWarpedMotionType type; + int32_t matrix[6]; + union { + struct { + int16_t alpha, beta, gamma, delta; + } p; + int16_t abcd[4]; + } u; +} Dav1dWarpedMotionParams; + +enum Dav1dPixelLayout { + DAV1D_PIXEL_LAYOUT_I400, ///< monochrome + DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar + DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar + DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar +}; + +enum Dav1dFrameType { + DAV1D_FRAME_TYPE_KEY = 0, ///< Key Intra frame + DAV1D_FRAME_TYPE_INTER = 1, ///< Inter frame + DAV1D_FRAME_TYPE_INTRA = 2, ///< Non key Intra frame + DAV1D_FRAME_TYPE_SWITCH = 3, ///< Switch Inter frame +}; + +enum Dav1dColorPrimaries { + DAV1D_COLOR_PRI_BT709 = 1, + DAV1D_COLOR_PRI_UNKNOWN = 2, + DAV1D_COLOR_PRI_BT470M = 4, + DAV1D_COLOR_PRI_BT470BG = 5, + DAV1D_COLOR_PRI_BT601 = 6, + DAV1D_COLOR_PRI_SMPTE240 = 7, + DAV1D_COLOR_PRI_FILM = 8, + DAV1D_COLOR_PRI_BT2020 = 9, + DAV1D_COLOR_PRI_XYZ = 10, + DAV1D_COLOR_PRI_SMPTE431 = 11, + DAV1D_COLOR_PRI_SMPTE432 = 12, + DAV1D_COLOR_PRI_EBU3213 = 22, + DAV1D_COLOR_PRI_RESERVED = 255, +}; + +enum Dav1dTransferCharacteristics { + DAV1D_TRC_BT709 = 1, + DAV1D_TRC_UNKNOWN = 2, + DAV1D_TRC_BT470M = 4, + DAV1D_TRC_BT470BG = 5, + DAV1D_TRC_BT601 = 6, + DAV1D_TRC_SMPTE240 = 7, + DAV1D_TRC_LINEAR = 8, + DAV1D_TRC_LOG100 = 9, ///< logarithmic (100:1 range) + DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range) + DAV1D_TRC_IEC61966 = 11, + DAV1D_TRC_BT1361 = 12, + DAV1D_TRC_SRGB = 13, + DAV1D_TRC_BT2020_10BIT = 14, + DAV1D_TRC_BT2020_12BIT = 15, + DAV1D_TRC_SMPTE2084 = 16, ///< PQ + DAV1D_TRC_SMPTE428 = 17, + DAV1D_TRC_HLG = 18, ///< hybrid log/gamma (BT.2100 / ARIB STD-B67) + DAV1D_TRC_RESERVED = 255, +}; + +enum Dav1dMatrixCoefficients { + DAV1D_MC_IDENTITY = 0, + DAV1D_MC_BT709 = 1, + DAV1D_MC_UNKNOWN = 2, + DAV1D_MC_FCC = 4, + DAV1D_MC_BT470BG = 5, + DAV1D_MC_BT601 = 6, + DAV1D_MC_SMPTE240 = 7, + DAV1D_MC_SMPTE_YCGCO = 8, + DAV1D_MC_BT2020_NCL = 9, + DAV1D_MC_BT2020_CL = 10, + DAV1D_MC_SMPTE2085 = 11, + DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived + DAV1D_MC_CHROMAT_CL = 13, + DAV1D_MC_ICTCP = 14, + DAV1D_MC_RESERVED = 255, +}; + +enum Dav1dChromaSamplePosition { + DAV1D_CHR_UNKNOWN = 0, + DAV1D_CHR_VERTICAL = 1, ///< Horizontally co-located with luma(0, 0) + ///< sample, between two vertical samples + DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample +}; + +typedef struct Dav1dContentLightLevel { + int max_content_light_level; + int max_frame_average_light_level; +} Dav1dContentLightLevel; + +typedef struct Dav1dMasteringDisplay { + ///< 0.16 fixed point + uint16_t primaries[3][2]; + ///< 0.16 fixed point + uint16_t white_point[2]; + ///< 24.8 fixed point + uint32_t max_luminance; + ///< 18.14 fixed point + uint32_t min_luminance; +} Dav1dMasteringDisplay; + +typedef struct Dav1dITUTT35 { + uint8_t country_code; + uint8_t country_code_extension_byte; + size_t payload_size; + uint8_t *payload; +} Dav1dITUTT35; + +typedef struct Dav1dSequenceHeader { + /** + * Stream profile, 0 for 8-10 bits/component 4:2:0 or monochrome; + * 1 for 8-10 bits/component 4:4:4; 2 for 4:2:2 at any bits/component, + * or 12 bits/component at any chroma subsampling. + */ + int profile; + /** + * Maximum dimensions for this stream. In non-scalable streams, these + * are often the actual dimensions of the stream, although that is not + * a normative requirement. + */ + int max_width, max_height; + enum Dav1dPixelLayout layout; ///< format of the picture + enum Dav1dColorPrimaries pri; ///< color primaries (av1) + enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1) + enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1) + enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1) + /** + * 0, 1 and 2 mean 8, 10 or 12 bits/component, respectively. This is not + * exactly the same as 'hbd' from the spec; the spec's hbd distinguishes + * between 8 (0) and 10-12 (1) bits/component, and another element + * (twelve_bit) to distinguish between 10 and 12 bits/component. To get + * the spec's hbd, use !!our_hbd, and to get twelve_bit, use hbd == 2. + */ + int hbd; + /** + * Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of + * MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma). + */ + int color_range; + + int num_operating_points; + struct Dav1dSequenceHeaderOperatingPoint { + int major_level, minor_level; + int initial_display_delay; + int idc; + int tier; + int decoder_model_param_present; + int display_model_param_present; + } operating_points[DAV1D_MAX_OPERATING_POINTS]; + + int still_picture; + int reduced_still_picture_header; + int timing_info_present; + int num_units_in_tick; + int time_scale; + int equal_picture_interval; + unsigned num_ticks_per_picture; + int decoder_model_info_present; + int encoder_decoder_buffer_delay_length; + int num_units_in_decoding_tick; + int buffer_removal_delay_length; + int frame_presentation_delay_length; + int display_model_info_present; + int width_n_bits, height_n_bits; + int frame_id_numbers_present; + int delta_frame_id_n_bits; + int frame_id_n_bits; + int sb128; + int filter_intra; + int intra_edge_filter; + int inter_intra; + int masked_compound; + int warped_motion; + int dual_filter; + int order_hint; + int jnt_comp; + int ref_frame_mvs; + enum Dav1dAdaptiveBoolean screen_content_tools; + enum Dav1dAdaptiveBoolean force_integer_mv; + int order_hint_n_bits; + int super_res; + int cdef; + int restoration; + int ss_hor, ss_ver, monochrome; + int color_description_present; + int separate_uv_delta_q; + int film_grain_present; + + // Dav1dSequenceHeaders of the same sequence are required to be + // bit-identical until this offset. See 7.5 "Ordering of OBUs": + // Within a particular coded video sequence, the contents of + // sequence_header_obu must be bit-identical each time the + // sequence header appears except for the contents of + // operating_parameters_info. + struct Dav1dSequenceHeaderOperatingParameterInfo { + int decoder_buffer_delay; + int encoder_buffer_delay; + int low_delay_mode; + } operating_parameter_info[DAV1D_MAX_OPERATING_POINTS]; +} Dav1dSequenceHeader; + +typedef struct Dav1dSegmentationData { + int delta_q; + int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v; + int ref; + int skip; + int globalmv; +} Dav1dSegmentationData; + +typedef struct Dav1dSegmentationDataSet { + Dav1dSegmentationData d[DAV1D_MAX_SEGMENTS]; + int preskip; + int last_active_segid; +} Dav1dSegmentationDataSet; + +typedef struct Dav1dLoopfilterModeRefDeltas { + int mode_delta[2 /* is_zeromv */]; + int ref_delta[DAV1D_TOTAL_REFS_PER_FRAME]; +} Dav1dLoopfilterModeRefDeltas; + +typedef struct Dav1dFilmGrainData { + unsigned seed; + int num_y_points; + uint8_t y_points[14][2 /* value, scaling */]; + int chroma_scaling_from_luma; + int num_uv_points[2]; + uint8_t uv_points[2][10][2 /* value, scaling */]; + int scaling_shift; + int ar_coeff_lag; + int8_t ar_coeffs_y[24]; + int8_t ar_coeffs_uv[2][25 + 3 /* padding for alignment purposes */]; + uint64_t ar_coeff_shift; + int grain_scale_shift; + int uv_mult[2]; + int uv_luma_mult[2]; + int uv_offset[2]; + int overlap_flag; + int clip_to_restricted_range; +} Dav1dFilmGrainData; + +typedef struct Dav1dFrameHeader { + struct { + Dav1dFilmGrainData data; + int present, update; + } film_grain; ///< film grain parameters + enum Dav1dFrameType frame_type; ///< type of the picture + int width[2 /* { coded_width, superresolution_upscaled_width } */], height; + int frame_offset; ///< frame number + int temporal_id; ///< temporal id of the frame for SVC + int spatial_id; ///< spatial id of the frame for SVC + + int show_existing_frame; + int existing_frame_idx; + int frame_id; + int frame_presentation_delay; + int show_frame; + int showable_frame; + int error_resilient_mode; + int disable_cdf_update; + int allow_screen_content_tools; + int force_integer_mv; + int frame_size_override; + int primary_ref_frame; + int buffer_removal_time_present; + struct Dav1dFrameHeaderOperatingPoint { + int buffer_removal_time; + } operating_points[DAV1D_MAX_OPERATING_POINTS]; + int refresh_frame_flags; + int render_width, render_height; + struct { + int width_scale_denominator; + int enabled; + } super_res; + int have_render_size; + int allow_intrabc; + int frame_ref_short_signaling; + int refidx[DAV1D_REFS_PER_FRAME]; + int hp; + enum Dav1dFilterMode subpel_filter_mode; + int switchable_motion_mode; + int use_ref_frame_mvs; + int refresh_context; + struct { + int uniform; + unsigned n_bytes; + int min_log2_cols, max_log2_cols, log2_cols, cols; + int min_log2_rows, max_log2_rows, log2_rows, rows; + uint16_t col_start_sb[DAV1D_MAX_TILE_COLS + 1]; + uint16_t row_start_sb[DAV1D_MAX_TILE_ROWS + 1]; + int update; + } tiling; + struct { + int yac; + int ydc_delta; + int udc_delta, uac_delta, vdc_delta, vac_delta; + int qm, qm_y, qm_u, qm_v; + } quant; + struct { + int enabled, update_map, temporal, update_data; + Dav1dSegmentationDataSet seg_data; + int lossless[DAV1D_MAX_SEGMENTS], qidx[DAV1D_MAX_SEGMENTS]; + } segmentation; + struct { + struct { + int present; + int res_log2; + } q; + struct { + int present; + int res_log2; + int multi; + } lf; + } delta; + int all_lossless; + struct { + int level_y[2 /* dir */]; + int level_u, level_v; + int mode_ref_delta_enabled; + int mode_ref_delta_update; + Dav1dLoopfilterModeRefDeltas mode_ref_deltas; + int sharpness; + } loopfilter; + struct { + int damping; + int n_bits; + int y_strength[DAV1D_MAX_CDEF_STRENGTHS]; + int uv_strength[DAV1D_MAX_CDEF_STRENGTHS]; + } cdef; + struct { + enum Dav1dRestorationType type[3 /* plane */]; + int unit_size[2 /* y, uv */]; + } restoration; + enum Dav1dTxfmMode txfm_mode; + int switchable_comp_refs; + int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2]; + int warp_motion; + int reduced_txtp_set; + Dav1dWarpedMotionParams gmv[DAV1D_REFS_PER_FRAME]; +} Dav1dFrameHeader; + +#endif /* DAV1D_HEADERS_H */ diff --git a/media/video/av1/include/dav1d/picture.h b/media/video/av1/include/dav1d/picture.h new file mode 100644 index 00000000..2eb0b62e --- /dev/null +++ b/media/video/av1/include/dav1d/picture.h @@ -0,0 +1,144 @@ +/* + * Copyright © 2018-2020, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_PICTURE_H +#define DAV1D_PICTURE_H + +#include +#include + +#include "common.h" +#include "headers.h" + +/* Number of bytes to align AND pad picture memory buffers by, so that SIMD + * implementations can over-read by a few bytes, and use aligned read/write + * instructions. */ +#define DAV1D_PICTURE_ALIGNMENT 64 + +typedef struct Dav1dPictureParameters { + int w; ///< width (in pixels) + int h; ///< height (in pixels) + enum Dav1dPixelLayout layout; ///< format of the picture + int bpc; ///< bits per pixel component (8 or 10) +} Dav1dPictureParameters; + +typedef struct Dav1dPicture { + Dav1dSequenceHeader *seq_hdr; + Dav1dFrameHeader *frame_hdr; + + /** + * Pointers to planar image data (Y is [0], U is [1], V is [2]). The data + * should be bytes (for 8 bpc) or words (for 10 bpc). In case of words + * containing 10 bpc image data, the pixels should be located in the LSB + * bits, so that values range between [0, 1023]; the upper bits should be + * zero'ed out. + */ + void *data[3]; + + /** + * Number of bytes between 2 lines in data[] for luma [0] or chroma [1]. + */ + ptrdiff_t stride[2]; + + Dav1dPictureParameters p; + Dav1dDataProps m; + + /** + * High Dynamic Range Content Light Level metadata applying to this picture, + * as defined in section 5.8.3 and 6.7.3 + */ + Dav1dContentLightLevel *content_light; + /** + * High Dynamic Range Mastering Display Color Volume metadata applying to + * this picture, as defined in section 5.8.4 and 6.7.4 + */ + Dav1dMasteringDisplay *mastering_display; + /** + * ITU-T T.35 metadata as defined in section 5.8.2 and 6.7.2 + */ + Dav1dITUTT35 *itut_t35; + + uintptr_t reserved[4]; ///< reserved for future use + + struct Dav1dRef *frame_hdr_ref; ///< Dav1dFrameHeader allocation origin + struct Dav1dRef *seq_hdr_ref; ///< Dav1dSequenceHeader allocation origin + struct Dav1dRef *content_light_ref; ///< Dav1dContentLightLevel allocation origin + struct Dav1dRef *mastering_display_ref; ///< Dav1dMasteringDisplay allocation origin + struct Dav1dRef *itut_t35_ref; ///< Dav1dITUTT35 allocation origin + uintptr_t reserved_ref[4]; ///< reserved for future use + struct Dav1dRef *ref; ///< Frame data allocation origin + + void *allocator_data; ///< pointer managed by the allocator +} Dav1dPicture; + +typedef struct Dav1dPicAllocator { + void *cookie; ///< custom data to pass to the allocator callbacks. + /** + * Allocate the picture buffer based on the Dav1dPictureParameters. + * + * The data[0], data[1] and data[2] must be DAV1D_PICTURE_ALIGNMENT byte + * aligned and with a pixel width/height multiple of 128 pixels. Any + * allocated memory area should also be padded by DAV1D_PICTURE_ALIGNMENT + * bytes. + * data[1] and data[2] must share the same stride[1]. + * + * This function will be called on the main thread (the thread which calls + * dav1d_get_picture()). + * + * @param pic The picture to allocate the buffer for. The callback needs to + * fill the picture data[0], data[1], data[2], stride[0] and + * stride[1]. + * The allocator can fill the pic allocator_data pointer with + * a custom pointer that will be passed to + * release_picture_callback(). + * @param cookie Custom pointer passed to all calls. + * + * @note No fields other than data, stride and allocator_data must be filled + * by this callback. + * @return 0 on success. A negative DAV1D_ERR value on error. + */ + int (*alloc_picture_callback)(Dav1dPicture *pic, void *cookie); + /** + * Release the picture buffer. + * + * If frame threading is used, this function may be called by the main + * thread (the thread which calls dav1d_get_picture()) or any of the frame + * threads and thus must be thread-safe. If frame threading is not used, + * this function will only be called on the main thread. + * + * @param pic The picture that was filled by alloc_picture_callback(). + * @param cookie Custom pointer passed to all calls. + */ + void (*release_picture_callback)(Dav1dPicture *pic, void *cookie); +} Dav1dPicAllocator; + +/** + * Release reference to a picture. + */ +DAV1D_API void dav1d_picture_unref(Dav1dPicture *p); + +#endif /* DAV1D_PICTURE_H */ diff --git a/media/video/av1/include/dav1d/version.h b/media/video/av1/include/dav1d/version.h new file mode 100644 index 00000000..b25f9cb1 --- /dev/null +++ b/media/video/av1/include/dav1d/version.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2019, VideoLAN and dav1d authors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_VERSION_H +#define DAV1D_VERSION_H + +#define DAV1D_API_VERSION_MAJOR 6 +#define DAV1D_API_VERSION_MINOR 7 +#define DAV1D_API_VERSION_PATCH 0 + +#endif /* DAV1D_VERSION_H */ diff --git a/media/video/av1/src/cdef.h b/media/video/av1/src/cdef.h new file mode 100644 index 00000000..07c84d9f --- /dev/null +++ b/media/video/av1/src/cdef.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_CDEF_H +#define DAV1D_SRC_CDEF_H + +#include +#include + +#include "common/bitdepth.h" + +enum CdefEdgeFlags { + CDEF_HAVE_LEFT = 1 << 0, + CDEF_HAVE_RIGHT = 1 << 1, + CDEF_HAVE_TOP = 1 << 2, + CDEF_HAVE_BOTTOM = 1 << 3, +}; + +#ifdef BITDEPTH +typedef const pixel (*const_left_pixel_row_2px)[2]; +#else +typedef const void *const_left_pixel_row_2px; +#endif + +// CDEF operates entirely on pre-filter data; if bottom/right edges are +// present (according to $edges), then the pre-filter data is located in +// $dst. However, the edge pixels above $dst may be post-filter, so in +// order to get access to pre-filter top pixels, use $top. +#define decl_cdef_fn(name) \ +void (name)(pixel *dst, ptrdiff_t stride, const_left_pixel_row_2px left, \ + const pixel *top, const pixel *bottom, \ + int pri_strength, int sec_strength, \ + int dir, int damping, enum CdefEdgeFlags edges HIGHBD_DECL_SUFFIX) +typedef decl_cdef_fn(*cdef_fn); + +#define decl_cdef_dir_fn(name) \ +int (name)(const pixel *dst, ptrdiff_t dst_stride, unsigned *var HIGHBD_DECL_SUFFIX) +typedef decl_cdef_dir_fn(*cdef_dir_fn); + +typedef struct Dav1dCdefDSPContext { + cdef_dir_fn dir; + cdef_fn fb[3 /* 444/luma, 422, 420 */]; +} Dav1dCdefDSPContext; + +bitfn_decls(void dav1d_cdef_dsp_init, Dav1dCdefDSPContext *c); + +#endif /* DAV1D_SRC_CDEF_H */ diff --git a/media/video/av1/src/cdef_tmpl.c b/media/video/av1/src/cdef_tmpl.c new file mode 100644 index 00000000..84993aeb --- /dev/null +++ b/media/video/av1/src/cdef_tmpl.c @@ -0,0 +1,157 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include + +#include "common/intops.h" + +#include "src/cdef.h" +#include "src/tables.h" + +static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX) +{ + const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; + printf("bitdepth_max = %d, bitdepth_min_8 = %d\n", bitdepth_max, bitdepth_min_8); + int partial_sum_hv[2][8] = { { 0 } }; + int partial_sum_diag[2][15] = { { 0 } }; + int partial_sum_alt[4][11] = { { 0 } }; + + for (int y = 0; y < 8; y++) { + printf("img: y: %d : %p -> \t", y, img); + for (int x = 0; x < 8; x++) { + const int px = (img[x] >> bitdepth_min_8) - 128; + printf("%04x/%04x ", img[x], px); + + partial_sum_diag[0][ y + x ] += px; + partial_sum_alt [0][ y + (x >> 1)] += px; + partial_sum_hv [0][ y ] += px; + partial_sum_alt [1][3 + y - (x >> 1)] += px; + partial_sum_diag[1][7 + y - x ] += px; + partial_sum_alt [2][3 - (y >> 1) + x ] += px; + partial_sum_hv [1][ x ] += px; + partial_sum_alt [3][ (y >> 1) + x ] += px; + } + printf("\n"); + img += PXSTRIDE(stride); + } + + printf("partial_sum_alt : \n"); + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 11; x++) { + printf("%08x ", partial_sum_alt[y][x]); + } + printf("\n"); + } + printf("\n"); + + unsigned cost[8] = { 0 }; + for (int n = 0; n < 8; n++) { + cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n]; + cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n]; + } + cost[2] *= 105; + cost[6] *= 105; + + static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 }; + for (int n = 0; n < 7; n++) { + const int d = div_table[n]; + printf("n: %d\n", n); + int t = partial_sum_diag[0][14 - n]; + printf("partial_sum_diag[0][14 - %d] = %d/%08x, partial_sum_diag[0][14 - %d]^2 = %d/%08x\n", n, t, t , n, t * t, t * t); + t = (partial_sum_diag[0][n] * partial_sum_diag[0][n] + + partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d; + printf("t = %d/%08x, d = %d/%08x, t * d = %d/%08x\n", t, t, d, d, t * d, t * d); + cost[0] += t * d; + cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] + + partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d; + } + cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105; + cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105; + + printf("cost: \n"); + for (int y = 0; y < 8; y++) { + printf("%08x ", cost[y]); + } + printf("\n"); + + for (int n = 0; n < 4; n++) { + unsigned *const cost_ptr = &cost[n * 2 + 1]; + for (int m = 0; m < 5; m++) + *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m]; + *cost_ptr *= 105; + for (int m = 0; m < 3; m++) { + const int d = div_table[2 * m + 1]; + *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] + + partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d; + } + } + printf("cost: \n"); + for (int y = 0; y < 8; y++) { + printf("%08x ", cost[y]); + } + printf("\n"); + + int best_dir = 0; + unsigned best_cost = cost[0]; + for (int n = 1; n < 8; n++) { + if (cost[n] > best_cost) { + best_cost = cost[n]; + best_dir = n; + } + } + + *var = (best_cost - (cost[best_dir ^ 4])) >> 10; + return best_dir; +} + +#if HAVE_ASM +#if ARCH_AARCH64 || ARCH_ARM +#include "src/arm/cdef.h" +#elif ARCH_PPC64LE +#include "src/ppc/cdef.h" +#elif ARCH_X86 +#include "src/x86/cdef.h" +#endif +#endif + +COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) { + c->dir = cdef_find_dir_c; + +#if HAVE_ASM +#if ARCH_AARCH64 || ARCH_ARM + cdef_dsp_init_arm(c); +#elif ARCH_PPC64LE + cdef_dsp_init_ppc(c); +#elif ARCH_X86 + cdef_dsp_init_x86(c); +#endif +#endif +} diff --git a/media/video/av1/src/cpu.c b/media/video/av1/src/cpu.c new file mode 100644 index 00000000..d24148c3 --- /dev/null +++ b/media/video/av1/src/cpu.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "config.h" + +#include + +#include "src/cpu.h" +#include "src/log.h" + +#ifdef _WIN32 +#include +#elif defined(__APPLE__) +#include +#include +#else +#include +#include +#endif + +#ifdef HAVE_PTHREAD_NP_H +#include +#endif +#if defined(__FreeBSD__) +#define cpu_set_t cpuset_t +#endif + +unsigned dav1d_cpu_flags = 0U; +unsigned dav1d_cpu_flags_mask = ~0U; + +COLD void dav1d_init_cpu(void) { +#if HAVE_ASM && !__has_feature(memory_sanitizer) +// memory sanitizer is inherently incompatible with asm +#if ARCH_AARCH64 || ARCH_ARM + dav1d_cpu_flags = dav1d_get_cpu_flags_arm(); +#elif ARCH_PPC64LE + dav1d_cpu_flags = dav1d_get_cpu_flags_ppc(); +#elif ARCH_X86 + dav1d_cpu_flags = dav1d_get_cpu_flags_x86(); +#endif +#endif +} + +COLD void dav1d_set_cpu_flags_mask(const unsigned mask) { + dav1d_cpu_flags_mask = mask; +} + +COLD int dav1d_num_logical_processors(Dav1dContext *const c) { +#ifdef _WIN32 +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + GROUP_AFFINITY affinity; + if (GetThreadGroupAffinity(GetCurrentThread(), &affinity)) { + int num_processors = 1; + while (affinity.Mask &= affinity.Mask - 1) + num_processors++; + return num_processors; + } +#else + SYSTEM_INFO system_info; + GetNativeSystemInfo(&system_info); + return system_info.dwNumberOfProcessors; +#endif +#elif defined(HAVE_PTHREAD_GETAFFINITY_NP) && defined(CPU_COUNT) + cpu_set_t affinity; + if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) + return CPU_COUNT(&affinity); +#elif defined(__APPLE__) + int num_processors; + size_t length = sizeof(num_processors); + if (!sysctlbyname("hw.logicalcpu", &num_processors, &length, NULL, 0)) + return num_processors; +#elif defined(_SC_NPROCESSORS_ONLN) + return (int)sysconf(_SC_NPROCESSORS_ONLN); +#endif + if (c) + dav1d_log(c, "Unable to detect thread count, defaulting to single-threaded mode\n"); + return 1; +} diff --git a/media/video/av1/src/cpu.h b/media/video/av1/src/cpu.h new file mode 100644 index 00000000..978f2bf0 --- /dev/null +++ b/media/video/av1/src/cpu.h @@ -0,0 +1,105 @@ +/* + * Copyright © 2018-2022, VideoLAN and dav1d authors + * Copyright © 2018-2022, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_CPU_H +#define DAV1D_SRC_CPU_H + +#include "config.h" + +#include "common/attributes.h" + +#include "dav1d/common.h" +#include "dav1d/dav1d.h" + +#if ARCH_AARCH64 || ARCH_ARM +#include "src/arm/cpu.h" +#elif ARCH_PPC64LE +#include "src/ppc/cpu.h" +#elif ARCH_X86 +#include "src/x86/cpu.h" +#endif + +EXTERN unsigned dav1d_cpu_flags; +EXTERN unsigned dav1d_cpu_flags_mask; + +void dav1d_init_cpu(void); +DAV1D_API void dav1d_set_cpu_flags_mask(unsigned mask); +int dav1d_num_logical_processors(Dav1dContext *c); + +static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) { + unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask; + +#if TRIM_DSP_FUNCTIONS +/* Since this function is inlined, unconditionally setting a flag here will + * enable dead code elimination in the calling function. */ +#if ARCH_AARCH64 || ARCH_ARM +#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64 + flags |= DAV1D_ARM_CPU_FLAG_NEON; +#endif +#elif ARCH_PPC64LE +#if defined(__VSX__) + flags |= DAV1D_PPC_CPU_FLAG_VSX; +#endif +#if defined(HAVE_SVP64) + flags |= DAV1D_PPC_CPU_FLAG_SVP64; +#endif +#elif ARCH_X86 +#if defined(__AVX512F__) && defined(__AVX512CD__) && \ + defined(__AVX512BW__) && defined(__AVX512DQ__) && \ + defined(__AVX512VL__) && defined(__AVX512VNNI__) && \ + defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \ + defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \ + defined(__AVX512BITALG__) && defined(__GFNI__) && \ + defined(__VAES__) && defined(__VPCLMULQDQ__) + flags |= DAV1D_X86_CPU_FLAG_AVX512ICL | + DAV1D_X86_CPU_FLAG_AVX2 | + DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__AVX2__) + flags |= DAV1D_X86_CPU_FLAG_AVX2 | + DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__SSE4_1__) || defined(__AVX__) + flags |= DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__SSSE3__) + flags |= DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif ARCH_X86_64 || defined(__SSE2__) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) + flags |= DAV1D_X86_CPU_FLAG_SSE2; +#endif +#endif +#endif + + return flags; +} + +#endif /* DAV1D_SRC_CPU_H */ diff --git a/media/video/av1/src/env.h b/media/video/av1/src/env.h new file mode 100644 index 00000000..6a4e3441 --- /dev/null +++ b/media/video/av1/src/env.h @@ -0,0 +1,521 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_ENV_H +#define DAV1D_SRC_ENV_H + +#include +#include +#include + +#include "src/levels.h" +//#include "src/refmvs.h" +#include "src/tables.h" + +typedef struct BlockContext { + uint8_t ALIGN(mode[32], 8); + uint8_t ALIGN(lcoef[32], 8); + uint8_t ALIGN(ccoef[2][32], 8); + uint8_t ALIGN(seg_pred[32], 8); + uint8_t ALIGN(skip[32], 8); + uint8_t ALIGN(skip_mode[32], 8); + uint8_t ALIGN(intra[32], 8); + uint8_t ALIGN(comp_type[32], 8); + int8_t ALIGN(ref[2][32], 8); // -1 means intra + uint8_t ALIGN(filter[2][32], 8); // 3 means unset + int8_t ALIGN(tx_intra[32], 8); + int8_t ALIGN(tx[32], 8); + uint8_t ALIGN(tx_lpf_y[32], 8); + uint8_t ALIGN(tx_lpf_uv[32], 8); + uint8_t ALIGN(partition[16], 8); + uint8_t ALIGN(uvmode[32], 8); + uint8_t ALIGN(pal_sz[32], 8); +} BlockContext; + +static inline int get_intra_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + if (have_left) { + if (have_top) { + const int ctx = l->intra[yb4] + a->intra[xb4]; + return ctx + (ctx == 2); + } else + return l->intra[yb4] * 2; + } else { + return have_top ? a->intra[xb4] * 2 : 0; + } +} + +static inline int get_tx_ctx(const BlockContext *const a, + const BlockContext *const l, + const TxfmInfo *const max_tx, + const int yb4, const int xb4) +{ + return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw); +} + +static inline int get_partition_ctx(const BlockContext *const a, + const BlockContext *const l, + const enum BlockLevel bl, + const int yb8, const int xb8) +{ + return ((a->partition[xb8] >> (4 - bl)) & 1) + + (((l->partition[yb8] >> (4 - bl)) & 1) << 1); +} + +static inline unsigned gather_left_partition_prob(const uint16_t *const in, + const enum BlockLevel bl) +{ + unsigned out = in[PARTITION_H - 1] - in[PARTITION_H]; + // Exploit the fact that cdfs for PARTITION_SPLIT, PARTITION_T_TOP_SPLIT, + // PARTITION_T_BOTTOM_SPLIT and PARTITION_T_LEFT_SPLIT are neighbors. + out += in[PARTITION_SPLIT - 1] - in[PARTITION_T_LEFT_SPLIT]; + if (bl != BL_128X128) + out += in[PARTITION_H4 - 1] - in[PARTITION_H4]; + return out; +} + +static inline unsigned gather_top_partition_prob(const uint16_t *const in, + const enum BlockLevel bl) +{ + // Exploit the fact that cdfs for PARTITION_V, PARTITION_SPLIT and + // PARTITION_T_TOP_SPLIT are neighbors. + unsigned out = in[PARTITION_V - 1] - in[PARTITION_T_TOP_SPLIT]; + // Exploit the facts that cdfs for PARTITION_T_LEFT_SPLIT and + // PARTITION_T_RIGHT_SPLIT are neighbors, the probability for + // PARTITION_V4 is always zero, and the probability for + // PARTITION_T_RIGHT_SPLIT is zero in 128x128 blocks. + out += in[PARTITION_T_LEFT_SPLIT - 1]; + if (bl != BL_128X128) + out += in[PARTITION_V4 - 1] - in[PARTITION_T_RIGHT_SPLIT]; + return out; +} + +static inline enum TxfmType get_uv_inter_txtp(const TxfmInfo *const uvt_dim, + const enum TxfmType ytxtp) +{ + if (uvt_dim->max == TX_32X32) + return ytxtp == IDTX ? IDTX : DCT_DCT; + if (uvt_dim->min == TX_16X16 && + ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) | + (1 << H_ADST) | (1 << V_ADST)))) + { + return DCT_DCT; + } + + return ytxtp; +} + +static inline int get_filter_ctx(const BlockContext *const a, + const BlockContext *const l, + const int comp, const int dir, const int ref, + const int yb4, const int xb4) +{ + const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ? + a->filter[dir][xb4] : DAV1D_N_SWITCHABLE_FILTERS; + const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ? + l->filter[dir][yb4] : DAV1D_N_SWITCHABLE_FILTERS; + + if (a_filter == l_filter) { + return comp * 4 + a_filter; + } else if (a_filter == DAV1D_N_SWITCHABLE_FILTERS) { + return comp * 4 + l_filter; + } else if (l_filter == DAV1D_N_SWITCHABLE_FILTERS) { + return comp * 4 + a_filter; + } else { + return comp * 4 + DAV1D_N_SWITCHABLE_FILTERS; + } +} + +static inline int get_comp_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + if (have_top) { + if (have_left) { + if (a->comp_type[xb4]) { + if (l->comp_type[yb4]) { + return 4; + } else { + // 4U means intra (-1) or bwd (>= 4) + return 2 + ((unsigned)l->ref[0][yb4] >= 4U); + } + } else if (l->comp_type[yb4]) { + // 4U means intra (-1) or bwd (>= 4) + return 2 + ((unsigned)a->ref[0][xb4] >= 4U); + } else { + return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4); + } + } else { + return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4; + } + } else if (have_left) { + return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4; + } else { + return 1; + } +} + +static inline int get_comp_dir_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ +#define has_uni_comp(edge, off) \ + ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4)) + + if (have_top && have_left) { + const int a_intra = a->intra[xb4], l_intra = l->intra[yb4]; + + if (a_intra && l_intra) return 2; + if (a_intra || l_intra) { + const BlockContext *const edge = a_intra ? l : a; + const int off = a_intra ? yb4 : xb4; + + if (edge->comp_type[off] == COMP_INTER_NONE) return 2; + return 1 + 2 * has_uni_comp(edge, off); + } + + const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE; + const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE; + const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4]; + + if (!a_comp && !l_comp) { + return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4)); + } else if (!a_comp || !l_comp) { + const BlockContext *const edge = a_comp ? a : l; + const int off = a_comp ? xb4 : yb4; + + if (!has_uni_comp(edge, off)) return 1; + return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4)); + } else { + const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4); + + if (!a_uni && !l_uni) return 0; + if (!a_uni || !l_uni) return 2; + return 3 + ((a_ref0 == 4) == (l_ref0 == 4)); + } + } else if (have_top || have_left) { + const BlockContext *const edge = have_left ? l : a; + const int off = have_left ? yb4 : xb4; + + if (edge->intra[off]) return 2; + if (edge->comp_type[off] == COMP_INTER_NONE) return 2; + return 4 * has_uni_comp(edge, off); + } else { + return 2; + } +} + +static inline int get_poc_diff(const int order_hint_n_bits, + const int poc0, const int poc1) +{ + if (!order_hint_n_bits) return 0; + const int mask = 1 << (order_hint_n_bits - 1); + const int diff = poc0 - poc1; + return (diff & (mask - 1)) - (diff & mask); +} + +static inline int get_jnt_comp_ctx(const int order_hint_n_bits, + const unsigned poc, const unsigned ref0poc, + const unsigned ref1poc, + const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4) +{ + const unsigned d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc)); + const unsigned d1 = abs(get_poc_diff(order_hint_n_bits, poc, ref1poc)); + const int offset = d0 == d1; + const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG || + a->ref[0][xb4] == 6; + const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG || + l->ref[0][yb4] == 6; + + return 3 * offset + a_ctx + l_ctx; +} + +static inline int get_mask_comp_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4) +{ + const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 : + a->ref[0][xb4] == 6 ? 3 : 0; + const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 : + l->ref[0][yb4] == 6 ? 3 : 0; + + return imin(a_ctx + l_ctx, 5); +} + +#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx +#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx +#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx +#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx +#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx +#define av1_get_uni_p_ctx av1_get_ref_ctx +#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx + +static inline int av1_get_ref_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + int have_top, int have_left) +{ + int cnt[2] = { 0 }; + + if (have_top && !a->intra[xb4]) { + cnt[a->ref[0][xb4] >= 4]++; + if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++; + } + + if (have_left && !l->intra[yb4]) { + cnt[l->ref[0][yb4] >= 4]++; + if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++; + } + + return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2; +} + +static inline int av1_get_fwd_ref_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[4] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++; + if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++; + } + + if (have_left && !l->intra[yb4]) { + if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++; + if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++; + } + + cnt[0] += cnt[1]; + cnt[2] += cnt[3]; + + return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2; +} + +static inline int av1_get_fwd_ref_1_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[2] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++; + if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++; + } + + if (have_left && !l->intra[yb4]) { + if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++; + if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++; + } + + return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2; +} + +static inline int av1_get_fwd_ref_2_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[2] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if ((a->ref[0][xb4] ^ 2U) < 2) cnt[a->ref[0][xb4] - 2]++; + if (a->comp_type[xb4] && (a->ref[1][xb4] ^ 2U) < 2) cnt[a->ref[1][xb4] - 2]++; + } + + if (have_left && !l->intra[yb4]) { + if ((l->ref[0][yb4] ^ 2U) < 2) cnt[l->ref[0][yb4] - 2]++; + if (l->comp_type[yb4] && (l->ref[1][yb4] ^ 2U) < 2) cnt[l->ref[1][yb4] - 2]++; + } + + return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2; +} + +static inline int av1_get_bwd_ref_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[3] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++; + if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++; + } + + if (have_left && !l->intra[yb4]) { + if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++; + if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++; + } + + cnt[1] += cnt[0]; + + return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2; +} + +static inline int av1_get_bwd_ref_1_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[3] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++; + if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++; + } + + if (have_left && !l->intra[yb4]) { + if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++; + if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++; + } + + return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2; +} + +static inline int av1_get_uni_p1_ctx(const BlockContext *const a, + const BlockContext *const l, + const int yb4, const int xb4, + const int have_top, const int have_left) +{ + int cnt[3] = { 0 }; + + if (have_top && !a->intra[xb4]) { + if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++; + if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++; + } + + if (have_left && !l->intra[yb4]) { + if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++; + if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++; + } + + cnt[1] += cnt[2]; + + return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2; +} + +static inline int get_drl_context(const refmvs_candidate *const ref_mv_stack, + const int ref_idx) +{ + if (ref_mv_stack[ref_idx].weight >= 640) + return ref_mv_stack[ref_idx + 1].weight < 640; + + return ref_mv_stack[ref_idx + 1].weight < 640 ? 2 : 0; +} + +static inline unsigned get_cur_frame_segid(const int by, const int bx, + const int have_top, + const int have_left, + int *const seg_ctx, + const uint8_t *cur_seg_map, + const ptrdiff_t stride) +{ + cur_seg_map += bx + by * stride; + if (have_left && have_top) { + const int l = cur_seg_map[-1]; + const int a = cur_seg_map[-stride]; + const int al = cur_seg_map[-(stride + 1)]; + + if (l == a && al == l) *seg_ctx = 2; + else if (l == a || al == l || a == al) *seg_ctx = 1; + else *seg_ctx = 0; + return a == al ? a : l; + } else { + *seg_ctx = 0; + return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0; + } +} + +static inline void fix_int_mv_precision(mv *const mv) { + mv->x = (mv->x - (mv->x >> 15) + 3) & ~7U; + mv->y = (mv->y - (mv->y >> 15) + 3) & ~7U; +} + +static inline void fix_mv_precision(const Dav1dFrameHeader *const hdr, + mv *const mv) +{ + if (hdr->force_integer_mv) { + fix_int_mv_precision(mv); + } else if (!hdr->hp) { + mv->x = (mv->x - (mv->x >> 15)) & ~1U; + mv->y = (mv->y - (mv->y >> 15)) & ~1U; + } +} + +static inline mv get_gmv_2d(const Dav1dWarpedMotionParams *const gmv, + const int bx4, const int by4, + const int bw4, const int bh4, + const Dav1dFrameHeader *const hdr) +{ + switch (gmv->type) { + case DAV1D_WM_TYPE_ROT_ZOOM: + assert(gmv->matrix[5] == gmv->matrix[2]); + assert(gmv->matrix[4] == -gmv->matrix[3]); + // fall-through + default: + case DAV1D_WM_TYPE_AFFINE: { + const int x = bx4 * 4 + bw4 * 2 - 1; + const int y = by4 * 4 + bh4 * 2 - 1; + const int xc = (gmv->matrix[2] - (1 << 16)) * x + + gmv->matrix[3] * y + gmv->matrix[0]; + const int yc = (gmv->matrix[5] - (1 << 16)) * y + + gmv->matrix[4] * x + gmv->matrix[1]; + const int shift = 16 - (3 - !hdr->hp); + const int round = (1 << shift) >> 1; + mv res = (mv) { + .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc), + .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc), + }; + if (hdr->force_integer_mv) + fix_int_mv_precision(&res); + return res; + } + case DAV1D_WM_TYPE_TRANSLATION: { + mv res = (mv) { + .y = gmv->matrix[0] >> 13, + .x = gmv->matrix[1] >> 13, + }; + if (hdr->force_integer_mv) + fix_int_mv_precision(&res); + return res; + } + case DAV1D_WM_TYPE_IDENTITY: + return (mv) { .x = 0, .y = 0 }; + } +} + +#endif /* DAV1D_SRC_ENV_H */ diff --git a/media/video/av1/src/internal.h b/media/video/av1/src/internal.h new file mode 100644 index 00000000..a387cdb2 --- /dev/null +++ b/media/video/av1/src/internal.h @@ -0,0 +1,374 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_INTERNAL_H +#define DAV1D_SRC_INTERNAL_H + +#include + +#include "dav1d/data.h" + +typedef struct Dav1dFrameContext Dav1dFrameContext; +typedef struct Dav1dTileState Dav1dTileState; +typedef struct Dav1dTaskContext Dav1dTaskContext; +typedef struct Dav1dTask Dav1dTask; + +#include "common/attributes.h" + +#include "src/cdef.h" +#include "src/picture.h" +#include "src/thread.h" + +typedef struct Dav1dDSPContext { + Dav1dCdefDSPContext cdef; +} Dav1dDSPContext; + +struct Dav1dTileGroup { + Dav1dData data; + int start, end; +}; + +enum TaskType { + DAV1D_TASK_TYPE_INIT, + DAV1D_TASK_TYPE_INIT_CDF, + DAV1D_TASK_TYPE_TILE_ENTROPY, + DAV1D_TASK_TYPE_ENTROPY_PROGRESS, + DAV1D_TASK_TYPE_TILE_RECONSTRUCTION, + DAV1D_TASK_TYPE_DEBLOCK_COLS, + DAV1D_TASK_TYPE_DEBLOCK_ROWS, + DAV1D_TASK_TYPE_CDEF, + DAV1D_TASK_TYPE_SUPER_RESOLUTION, + DAV1D_TASK_TYPE_LOOP_RESTORATION, + DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS, + DAV1D_TASK_TYPE_FG_PREP, + DAV1D_TASK_TYPE_FG_APPLY, +}; + +struct Dav1dContext { + Dav1dFrameContext *fc; + unsigned n_fc; + + Dav1dTaskContext *tc; + unsigned n_tc; + + // cache of OBUs that make up a single frame before we submit them + // to a frame worker to be decoded + struct Dav1dTileGroup *tile; + int n_tile_data_alloc; + int n_tile_data; + int n_tiles; + Dav1dMemPool *seq_hdr_pool; + Dav1dRef *seq_hdr_ref; + Dav1dSequenceHeader *seq_hdr; + Dav1dMemPool *frame_hdr_pool; + Dav1dRef *frame_hdr_ref; + Dav1dFrameHeader *frame_hdr; + + Dav1dRef *content_light_ref; + Dav1dContentLightLevel *content_light; + Dav1dRef *mastering_display_ref; + Dav1dMasteringDisplay *mastering_display; + Dav1dRef *itut_t35_ref; + Dav1dITUTT35 *itut_t35; + + // decoded output picture queue + Dav1dData in; + Dav1dThreadPicture out, cache; + // dummy is a pointer to prevent compiler errors about atomic_load() + // not taking const arguments + atomic_int flush_mem, *flush; + struct { + Dav1dThreadPicture *out_delayed; + unsigned next; + } frame_thread; + + // task threading (refer to tc[] for per_thread thingies) + struct TaskThreadData { + pthread_mutex_t lock; + pthread_cond_t cond; + atomic_uint first; + unsigned cur; + // This is used for delayed reset of the task cur pointer when + // such operation is needed but the thread doesn't enter a critical + // section (typically when executing the next sbrow task locklessly). + // See src/thread_task.c:reset_task_cur(). + atomic_uint reset_task_cur; + atomic_int cond_signaled; + struct { + int exec; + pthread_cond_t cond; + const Dav1dPicture *in; + Dav1dPicture *out; + enum TaskType type; + atomic_int progress[2]; /* [0]=started, [1]=completed */ + union { + struct { + ALIGN(uint8_t scaling_8bpc[3][256], 64); + }; + struct { + ALIGN(uint8_t scaling_16bpc[3][4096], 64); + }; + }; + } delayed_fg; + int inited; + } task_thread; + + // reference/entropy state + Dav1dMemPool *segmap_pool; + Dav1dMemPool *refmvs_pool; + struct { + Dav1dThreadPicture p; + Dav1dRef *segmap; + Dav1dRef *refmvs; + unsigned refpoc[7]; + } refs[8]; + Dav1dMemPool *cdf_pool; + + Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */]; + //Dav1dRefmvsDSPContext refmvs_dsp; + + Dav1dPicAllocator allocator; + int apply_grain; + int operating_point; + unsigned operating_point_idc; + int all_layers; + int max_spatial_id; + unsigned frame_size_limit; + int strict_std_compliance; + int output_invisible_frames; + enum Dav1dInloopFilterType inloop_filters; + int drain; + enum PictureFlags frame_flags; + enum Dav1dEventFlags event_flags; + Dav1dDataProps cached_error_props; + int cached_error; + + Dav1dLogger logger; + + Dav1dMemPool *picture_pool; +}; + +struct Dav1dTask { + unsigned frame_idx; // frame thread id + enum TaskType type; // task work + int sby; // sbrow + + // task dependencies + int recon_progress, deblock_progress; + int deps_skip; + struct Dav1dTask *next; // only used in task queue +}; + +struct Dav1dFrameContext { + Dav1dRef *seq_hdr_ref; + Dav1dSequenceHeader *seq_hdr; + Dav1dRef *frame_hdr_ref; + Dav1dFrameHeader *frame_hdr; + Dav1dThreadPicture refp[7]; + Dav1dPicture cur; // during block coding / reconstruction + Dav1dThreadPicture sr_cur; // after super-resolution upscaling + Dav1dRef *mvs_ref; + Dav1dRef *ref_mvs_ref[7]; + Dav1dRef *cur_segmap_ref, *prev_segmap_ref; + uint8_t *cur_segmap; + const uint8_t *prev_segmap; + unsigned refpoc[7], refrefpoc[7][7]; + uint8_t gmv_warp_allowed[7]; + struct Dav1dTileGroup *tile; + int n_tile_data_alloc; + int n_tile_data; + + // for scalable references + struct ScalableMotionParams { + int scale; // if no scaling, this is 0 + int step; + } svc[7][2 /* x, y */]; + int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */]; + + const Dav1dContext *c; + Dav1dTileState *ts; + int n_ts; + const Dav1dDSPContext *dsp; + + int ipred_edge_sz; + pixel *ipred_edge[3]; + ptrdiff_t b4_stride; + int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w; + uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */]; + int a_sz /* w*tile_rows */; + uint8_t jnt_weights[7][7]; + int bitdepth_max; + + struct { + int next_tile_row[2 /* 0: reconstruction, 1: entropy */]; + int entropy_progress; + atomic_int deblock_progress; // in sby units + atomic_uint *frame_progress, *copy_lpf_progress; + // indexed using t->by * f->b4_stride + t->bx + struct CodedBlockInfo { + int16_t eob[3 /* plane */]; + uint8_t txtp[3 /* plane */]; + } *cbi; + // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1) + uint16_t (*pal)[3 /* plane */][8 /* idx */]; + // iterated over inside tile state + uint8_t *pal_idx; + coef *cf; + int prog_sz; + int pal_sz, pal_idx_sz, cf_sz; + // start offsets per tile + int *tile_start_off; + } frame_thread; + + struct { + pthread_cond_t cond; + struct TaskThreadData *ttd; + struct Dav1dTask *tasks, *tile_tasks[2], init_task; + int num_tasks, num_tile_tasks; + int init_done; + int done[2]; + int retval; + int update_set; // whether we need to update CDF reference + atomic_int error; + int task_counter; + struct Dav1dTask *task_head, *task_tail; + // Points to the task directly before the cur pointer in the queue. + // This cur pointer is theoretical here, we actually keep track of the + // "prev_t" variable. This is needed to not loose the tasks in + // [head;cur-1] when picking one for execution. + struct Dav1dTask *task_cur_prev; + } task_thread; + + // threading (refer to tc[] for per-thread things) + struct FrameTileThreadData { + int (*lowest_pixel_mem)[7][2]; + int lowest_pixel_mem_sz; + } tile_thread; +}; + +struct Dav1dTileState { + + struct { + int col_start, col_end, row_start, row_end; // in 4px units + int col, row; // in tile units + } tiling; + + // in sby units, TILE_ERROR after a decoding error + atomic_int progress[2 /* 0: reconstruction, 1: entropy */]; + struct { + uint8_t *pal_idx; + coef *cf; + } frame_thread[2 /* 0: reconstruction, 1: entropy */]; + + // in fullpel units, [0] = Y, [1] = UV, used for progress requirements + // each entry is one tile-sbrow; middle index is refidx + int (*lowest_pixel)[7][2]; + + uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */]; + const uint16_t (*dq)[3][2]; + int last_qidx; + + int8_t last_delta_lf[4]; + uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */]; + const uint8_t (*lflvl)[4][8][2]; + +}; + +struct Dav1dTaskContext { + const Dav1dContext *c; + const Dav1dFrameContext *f; + Dav1dTileState *ts; + int bx, by; + ALIGN(union, 64) { + int16_t cf_8bpc [32 * 32]; + int32_t cf_16bpc[32 * 32]; + }; + // FIXME types can be changed to pixel (and dynamically allocated) + // which would make copy/assign operations slightly faster? + uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */]; + uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */]; + uint8_t txtp_map[32 * 32]; // inter-only + ALIGN(union, 64) { + struct { + union { + uint8_t lap_8bpc [128 * 32]; + uint16_t lap_16bpc[128 * 32]; + struct { + int16_t compinter[2][128 * 128]; + uint8_t seg_mask[128 * 128]; + }; + }; + union { + // stride=192 for non-SVC, or 320 for SVC + uint8_t emu_edge_8bpc [320 * (256 + 7)]; + uint16_t emu_edge_16bpc[320 * (256 + 7)]; + }; + }; + struct { + union { + uint8_t levels[32 * 34]; + struct { + uint8_t pal_order[64][8]; + uint8_t pal_ctx[64]; + }; + }; + int16_t ac[32 * 32]; + uint8_t pal_idx[2 * 64 * 64]; + uint16_t pal[3 /* plane */][8 /* palette_idx */]; + ALIGN(union, 64) { + struct { + uint8_t interintra_8bpc[64 * 64]; + uint8_t edge_8bpc[257]; + }; + struct { + uint16_t interintra_16bpc[64 * 64]; + uint16_t edge_16bpc[257]; + }; + }; + }; + } scratch; + + Dav1dWarpedMotionParams warpmv; + int top_pre_cdef_toggle; + int8_t *cur_sb_cdef_idx_ptr; + // for chroma sub8x8, we need to know the filter for all 4 subblocks in + // a 4x4 area, but the top/left one can go out of cache already, so this + // keeps it accessible + + struct { + int pass; + } frame_thread; + struct { + struct thread_data td; + struct TaskThreadData *ttd; + struct FrameTileThreadData *fttd; + int flushed; + int die; + } task_thread; +}; + +#endif /* DAV1D_SRC_INTERNAL_H */ diff --git a/media/video/av1/src/levels.h b/media/video/av1/src/levels.h new file mode 100644 index 00000000..0f510e9f --- /dev/null +++ b/media/video/av1/src/levels.h @@ -0,0 +1,289 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_LEVELS_H +#define DAV1D_SRC_LEVELS_H + +#include + +#include "dav1d/headers.h" +#include "common/attributes.h" + +enum ObuMetaType { + OBU_META_HDR_CLL = 1, + OBU_META_HDR_MDCV = 2, + OBU_META_SCALABILITY = 3, + OBU_META_ITUT_T35 = 4, + OBU_META_TIMECODE = 5, +}; + +enum TxfmSize { + TX_4X4, + TX_8X8, + TX_16X16, + TX_32X32, + TX_64X64, + N_TX_SIZES, +}; + +enum BlockLevel { + BL_128X128, + BL_64X64, + BL_32X32, + BL_16X16, + BL_8X8, + N_BL_LEVELS, +}; + +enum RectTxfmSize { + RTX_4X8 = N_TX_SIZES, + RTX_8X4, + RTX_8X16, + RTX_16X8, + RTX_16X32, + RTX_32X16, + RTX_32X64, + RTX_64X32, + RTX_4X16, + RTX_16X4, + RTX_8X32, + RTX_32X8, + RTX_16X64, + RTX_64X16, + N_RECT_TX_SIZES +}; + +enum TxfmType { + DCT_DCT, // DCT in both horizontal and vertical + ADST_DCT, // ADST in vertical, DCT in horizontal + DCT_ADST, // DCT in vertical, ADST in horizontal + ADST_ADST, // ADST in both directions + FLIPADST_DCT, + DCT_FLIPADST, + FLIPADST_FLIPADST, + ADST_FLIPADST, + FLIPADST_ADST, + IDTX, + V_DCT, + H_DCT, + V_ADST, + H_ADST, + V_FLIPADST, + H_FLIPADST, + N_TX_TYPES, + WHT_WHT = N_TX_TYPES, + N_TX_TYPES_PLUS_LL, +}; + +enum TxClass { + TX_CLASS_2D, + TX_CLASS_H, + TX_CLASS_V, +}; + +enum IntraPredMode { + DC_PRED, + VERT_PRED, + HOR_PRED, + DIAG_DOWN_LEFT_PRED, + DIAG_DOWN_RIGHT_PRED, + VERT_RIGHT_PRED, + HOR_DOWN_PRED, + HOR_UP_PRED, + VERT_LEFT_PRED, + SMOOTH_PRED, + SMOOTH_V_PRED, + SMOOTH_H_PRED, + PAETH_PRED, + N_INTRA_PRED_MODES, + CFL_PRED = N_INTRA_PRED_MODES, + N_UV_INTRA_PRED_MODES, + N_IMPL_INTRA_PRED_MODES = N_UV_INTRA_PRED_MODES, + LEFT_DC_PRED = DIAG_DOWN_LEFT_PRED, + TOP_DC_PRED, + DC_128_PRED, + Z1_PRED, + Z2_PRED, + Z3_PRED, + FILTER_PRED = N_INTRA_PRED_MODES, +}; + +enum InterIntraPredMode { + II_DC_PRED, + II_VERT_PRED, + II_HOR_PRED, + II_SMOOTH_PRED, + N_INTER_INTRA_PRED_MODES, +}; + +enum BlockPartition { + PARTITION_NONE, // [ ] <-. + PARTITION_H, // [-] | + PARTITION_V, // [|] | + PARTITION_SPLIT, // [+] --' + PARTITION_T_TOP_SPLIT, // [⊥] i.e. split top, H bottom + PARTITION_T_BOTTOM_SPLIT, // [т] i.e. H top, split bottom + PARTITION_T_LEFT_SPLIT, // [-|] i.e. split left, V right + PARTITION_T_RIGHT_SPLIT, // [|-] i.e. V left, split right + PARTITION_H4, // [Ⲷ] + PARTITION_V4, // [Ⲽ] + N_PARTITIONS, + N_SUB8X8_PARTITIONS = PARTITION_T_TOP_SPLIT, +}; + +enum BlockSize { + BS_128x128, + BS_128x64, + BS_64x128, + BS_64x64, + BS_64x32, + BS_64x16, + BS_32x64, + BS_32x32, + BS_32x16, + BS_32x8, + BS_16x64, + BS_16x32, + BS_16x16, + BS_16x8, + BS_16x4, + BS_8x32, + BS_8x16, + BS_8x8, + BS_8x4, + BS_4x16, + BS_4x8, + BS_4x4, + N_BS_SIZES, +}; + +enum Filter2d { // order is horizontal, vertical + FILTER_2D_8TAP_REGULAR, + FILTER_2D_8TAP_REGULAR_SMOOTH, + FILTER_2D_8TAP_REGULAR_SHARP, + FILTER_2D_8TAP_SHARP_REGULAR, + FILTER_2D_8TAP_SHARP_SMOOTH, + FILTER_2D_8TAP_SHARP, + FILTER_2D_8TAP_SMOOTH_REGULAR, + FILTER_2D_8TAP_SMOOTH, + FILTER_2D_8TAP_SMOOTH_SHARP, + FILTER_2D_BILINEAR, + N_2D_FILTERS, +}; + +enum MVJoint { + MV_JOINT_ZERO, + MV_JOINT_H, + MV_JOINT_V, + MV_JOINT_HV, + N_MV_JOINTS, +}; + +enum InterPredMode { + NEARESTMV, + NEARMV, + GLOBALMV, + NEWMV, + N_INTER_PRED_MODES, +}; + +enum DRL_PROXIMITY { + NEAREST_DRL, + NEARER_DRL, + NEAR_DRL, + NEARISH_DRL +}; + +enum CompInterPredMode { + NEARESTMV_NEARESTMV, + NEARMV_NEARMV, + NEARESTMV_NEWMV, + NEWMV_NEARESTMV, + NEARMV_NEWMV, + NEWMV_NEARMV, + GLOBALMV_GLOBALMV, + NEWMV_NEWMV, + N_COMP_INTER_PRED_MODES, +}; + +enum CompInterType { + COMP_INTER_NONE, + COMP_INTER_WEIGHTED_AVG, + COMP_INTER_AVG, + COMP_INTER_SEG, + COMP_INTER_WEDGE, +}; + +enum InterIntraType { + INTER_INTRA_NONE, + INTER_INTRA_BLEND, + INTER_INTRA_WEDGE, +}; + +typedef union mv { + struct { + int16_t y, x; + }; + uint32_t n; +} mv; + +enum MotionMode { + MM_TRANSLATION, + MM_OBMC, + MM_WARP, +}; + +#define QINDEX_RANGE 256 + +typedef struct Av1Block { + uint8_t bl, bs, bp; + uint8_t intra, seg_id, skip_mode, skip, uvtx; + union { + struct { + uint8_t y_mode, uv_mode, tx, pal_sz[2]; + int8_t y_angle, uv_angle, cfl_alpha[2]; + }; // intra + struct { + union { + struct { + union mv mv[2]; + uint8_t wedge_idx, mask_sign, interintra_mode; + }; + struct { + union mv mv2d; + int16_t matrix[4]; + }; + }; + uint8_t comp_type, inter_mode, motion_mode, drl_idx; + int8_t ref[2]; + uint8_t max_ytx, filter2d, interintra_type, tx_split0; + uint16_t tx_split1; + }; // inter + }; +} Av1Block; + +#endif /* DAV1D_SRC_LEVELS_H */ diff --git a/media/video/av1/src/log.c b/media/video/av1/src/log.c new file mode 100644 index 00000000..de6776a6 --- /dev/null +++ b/media/video/av1/src/log.c @@ -0,0 +1,57 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include + +#include "dav1d/dav1d.h" + +#include "common/validate.h" + +#include "src/internal.h" +#include "src/log.h" + +#if CONFIG_LOG +COLD void dav1d_log_default_callback(void *const cookie, + const char *const format, va_list ap) +{ + vfprintf(stderr, format, ap); +} + +COLD void dav1d_log(Dav1dContext *const c, const char *const format, ...) { + validate_input(c != NULL); + + if (!c->logger.callback) + return; + + va_list ap; + va_start(ap, format); + c->logger.callback(c->logger.cookie, format, ap); + va_end(ap); +} +#endif diff --git a/media/video/av1/src/log.h b/media/video/av1/src/log.h new file mode 100644 index 00000000..df32de7f --- /dev/null +++ b/media/video/av1/src/log.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_LOG_H +#define DAV1D_SRC_LOG_H + +#include "config.h" + +#include + +#include "dav1d/dav1d.h" + +#include "common/attributes.h" + +#if CONFIG_LOG +#define dav1d_log dav1d_log +void dav1d_log_default_callback(void *cookie, const char *format, va_list ap); +void dav1d_log(Dav1dContext *c, const char *format, ...) ATTR_FORMAT_PRINTF(2, 3); +#else +#define dav1d_log_default_callback NULL +#define dav1d_log(...) do { } while(0) +#endif + +#endif /* DAV1D_SRC_LOG_H */ diff --git a/media/video/av1/src/mem.h b/media/video/av1/src/mem.h new file mode 100644 index 00000000..41ae47a2 --- /dev/null +++ b/media/video/av1/src/mem.h @@ -0,0 +1,103 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_MEM_H +#define DAV1D_SRC_MEM_H + +#include + +#if defined(HAVE_ALIGNED_MALLOC) || defined(HAVE_MEMALIGN) +#include +#endif + +#include "common/attributes.h" + +#include "src/thread.h" + +typedef struct Dav1dMemPoolBuffer { + void *data; + struct Dav1dMemPoolBuffer *next; +} Dav1dMemPoolBuffer; + +typedef struct Dav1dMemPool { + pthread_mutex_t lock; + Dav1dMemPoolBuffer *buf; + int ref_cnt; + int end; +} Dav1dMemPool; + +void dav1d_mem_pool_push(Dav1dMemPool *pool, Dav1dMemPoolBuffer *buf); +Dav1dMemPoolBuffer *dav1d_mem_pool_pop(Dav1dMemPool *pool, size_t size); +int dav1d_mem_pool_init(Dav1dMemPool **pool); +void dav1d_mem_pool_end(Dav1dMemPool *pool); + +/* + * Allocate align-byte aligned memory. The return value can be released + * by calling the dav1d_free_aligned() function. + */ +static inline void *dav1d_alloc_aligned(size_t sz, size_t align) { + assert(!(align & (align - 1))); +#ifdef HAVE_POSIX_MEMALIGN + void *ptr; + if (posix_memalign(&ptr, align, sz)) return NULL; + return ptr; +#elif defined(HAVE_ALIGNED_MALLOC) + return _aligned_malloc(sz, align); +#elif defined(HAVE_MEMALIGN) + return memalign(align, sz); +#else +#error Missing aligned alloc implementation +#endif +} + +static inline void dav1d_free_aligned(void* ptr) { +#ifdef HAVE_POSIX_MEMALIGN + free(ptr); +#elif defined(HAVE_ALIGNED_MALLOC) + _aligned_free(ptr); +#elif defined(HAVE_MEMALIGN) + free(ptr); +#endif +} + +static inline void dav1d_freep_aligned(void* ptr) { + void **mem = (void **) ptr; + if (*mem) { + dav1d_free_aligned(*mem); + *mem = NULL; + } +} + +static inline void freep(void *ptr) { + void **mem = (void **) ptr; + if (*mem) { + free(*mem); + *mem = NULL; + } +} + +#endif /* DAV1D_SRC_MEM_H */ diff --git a/media/video/av1/src/picture.h b/media/video/av1/src/picture.h new file mode 100644 index 00000000..154c85a0 --- /dev/null +++ b/media/video/av1/src/picture.h @@ -0,0 +1,109 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_PICTURE_H +#define DAV1D_SRC_PICTURE_H + +#include + +#include "src/thread.h" +#include "dav1d/picture.h" + +#include "src/thread_data.h" +#include "src/ref.h" + +enum PlaneType { + PLANE_TYPE_Y, + PLANE_TYPE_UV, + PLANE_TYPE_BLOCK, + PLANE_TYPE_ALL, +}; + +enum PictureFlags { + PICTURE_FLAG_NEW_SEQUENCE = 1 << 0, + PICTURE_FLAG_NEW_OP_PARAMS_INFO = 1 << 1, + PICTURE_FLAG_NEW_TEMPORAL_UNIT = 1 << 2, +}; + +typedef struct Dav1dThreadPicture { + Dav1dPicture p; + int visible; + // This can be set for inter frames, non-key intra frames, or for invisible + // keyframes that have not yet been made visible using the show-existing-frame + // mechanism. + int showable; + enum PictureFlags flags; + // [0] block data (including segmentation map and motion vectors) + // [1] pixel data + atomic_uint *progress; +} Dav1dThreadPicture; + +typedef struct Dav1dPictureBuffer { + void *data; + struct Dav1dPictureBuffer *next; +} Dav1dPictureBuffer; + +/* + * Allocate a picture with custom border size. + */ +int dav1d_thread_picture_alloc(Dav1dContext *c, Dav1dFrameContext *f, const int bpc); + +/** + * Allocate a picture with identical metadata to an existing picture. + * The width is a separate argument so this function can be used for + * super-res, where the width changes, but everything else is the same. + * For the more typical use case of allocating a new image of the same + * dimensions, use src->p.w as width. + */ +int dav1d_picture_alloc_copy(Dav1dContext *c, Dav1dPicture *dst, const int w, + const Dav1dPicture *src); + +/** + * Create a copy of a picture. + */ +void dav1d_picture_ref(Dav1dPicture *dst, const Dav1dPicture *src); +void dav1d_thread_picture_ref(Dav1dThreadPicture *dst, + const Dav1dThreadPicture *src); +void dav1d_thread_picture_move_ref(Dav1dThreadPicture *dst, + Dav1dThreadPicture *src); +void dav1d_thread_picture_unref(Dav1dThreadPicture *p); + +/** + * Move a picture reference. + */ +void dav1d_picture_move_ref(Dav1dPicture *dst, Dav1dPicture *src); + +int dav1d_default_picture_alloc(Dav1dPicture *p, void *cookie); +void dav1d_default_picture_release(Dav1dPicture *p, void *cookie); +void dav1d_picture_unref_internal(Dav1dPicture *p); + +/** + * Get event flags from picture flags. + */ +enum Dav1dEventFlags dav1d_picture_get_event_flags(const Dav1dThreadPicture *p); + +#endif /* DAV1D_SRC_PICTURE_H */ diff --git a/media/video/av1/src/ppc/cdef.h b/media/video/av1/src/ppc/cdef.h new file mode 100644 index 00000000..a94ad22a --- /dev/null +++ b/media/video/av1/src/ppc/cdef.h @@ -0,0 +1,67 @@ +/* + * Copyright © 2019, Luca Barbato + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "common/bitdepth.h" +#include "common/intops.h" + +#include "src/cdef.h" +#include "src/cpu.h" + +/* +#define cdef_svp64_fn(w, h) \ +void cdef_filter_block_##w##x##h##_svp64(pixel *const dst, \ + const ptrdiff_t dst_stride, \ + const pixel (*left)[2], \ + const pixel *const top, \ + const pixel *const bottom, \ + const int pri_strength, \ + const int sec_strength, \ + const int dir, \ + const int damping, \ + const enum CdefEdgeFlags edges) + +cdef_svp64_fn(4, 4); +cdef_svp64_fn(4, 8); +cdef_svp64_fn(8, 8);*/ + +int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX); + +int cdef_find_dir_svp64_real(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX); + +static ALWAYS_INLINE void cdef_dsp_init_ppc(Dav1dCdefDSPContext *const c) { + const unsigned flags = dav1d_get_cpu_flags(); + + if (!(flags & DAV1D_PPC_CPU_FLAG_SVP64)) return; + +#ifdef HAVE_SVP64 + c->dir = cdef_find_dir_svp64; +#endif + +} diff --git a/media/video/av1/src/ppc/cdef_tmpl_svp64.c b/media/video/av1/src/ppc/cdef_tmpl_svp64.c new file mode 100644 index 00000000..266c98a1 --- /dev/null +++ b/media/video/av1/src/ppc/cdef_tmpl_svp64.c @@ -0,0 +1,103 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include + +#include "common/intops.h" + +#include "src/ppc/cdef.h" +#include "src/tables.h" + +int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX) +{ + const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; + int partial_sum_hv[2][8] = { { 0 } }; + int partial_sum_diag[2][15] = { { 0 } }; + int partial_sum_alt[4][11] = { { 0 } }; + + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) { + const int px = (img[x] >> bitdepth_min_8) - 128; + + partial_sum_diag[0][ y + x ] += px; + partial_sum_alt [0][ y + (x >> 1)] += px; + partial_sum_hv [0][ y ] += px; + partial_sum_alt [1][3 + y - (x >> 1)] += px; + partial_sum_diag[1][7 + y - x ] += px; + partial_sum_alt [2][3 - (y >> 1) + x ] += px; + partial_sum_hv [1][ x ] += px; + partial_sum_alt [3][ (y >> 1) + x ] += px; + } + img += PXSTRIDE(stride); + } + + unsigned cost[8] = { 0 }; +/* for (int n = 0; n < 8; n++) { + cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n]; + cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n]; + } + cost[2] *= 105; + cost[6] *= 105; + + static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 }; + for (int n = 0; n < 7; n++) { + const int d = div_table[n]; + cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] + + partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d; + cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] + + partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d; + } + cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105; + cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105; + + for (int n = 0; n < 4; n++) { + unsigned *const cost_ptr = &cost[n * 2 + 1]; + for (int m = 0; m < 5; m++) + *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m]; + *cost_ptr *= 105; + for (int m = 0; m < 3; m++) { + const int d = div_table[2 * m + 1]; + *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] + + partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d; + } + } +*/ + int best_dir = 0; + unsigned best_cost = cost[0]; + for (int n = 1; n < 8; n++) { + if (cost[n] > best_cost) { + best_cost = cost[n]; + best_dir = n; + } + } + + *var = (best_cost - (cost[best_dir ^ 4])) >> 10; + return best_dir; +} diff --git a/media/video/av1/src/ppc/cdef_tmpl_svp64_real.c.in b/media/video/av1/src/ppc/cdef_tmpl_svp64_real.c.in new file mode 100644 index 00000000..adcb1c2e --- /dev/null +++ b/media/video/av1/src/ppc/cdef_tmpl_svp64_real.c.in @@ -0,0 +1,103 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include + +#include "common/intops.h" + +#include "src/ppc/cdef.h" + +int cdef_find_dir_svp64_real(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX) +{ + const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; + int partial_sum_hv[2][8] = { { 0 } }; + int partial_sum_diag[2][15] = { { 0 } }; + int partial_sum_alt[4][11] = { { 0 } }; +/* + for (int y = 0; y < 8; y++) { + for (int x = 0; x < 8; x++) { + const int px = (img[x] >> bitdepth_min_8) - 128; + + partial_sum_diag[0][ y + x ] += px; + partial_sum_alt [0][ y + (x >> 1)] += px; + partial_sum_hv [0][ y ] += px; + partial_sum_alt [1][3 + y - (x >> 1)] += px; + partial_sum_diag[1][7 + y - x ] += px; + partial_sum_alt [2][3 - (y >> 1) + x ] += px; + partial_sum_hv [1][ x ] += px; + partial_sum_alt [3][ (y >> 1) + x ] += px; + } + img += PXSTRIDE(stride); + } + + unsigned cost[8] = { 0 }; + for (int n = 0; n < 8; n++) { + cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n]; + cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n]; + } + cost[2] *= 105; + cost[6] *= 105; + + static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 }; + for (int n = 0; n < 7; n++) { + const int d = div_table[n]; + cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] + + partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d; + cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] + + partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d; + } + cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105; + cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105; + + for (int n = 0; n < 4; n++) { + unsigned *const cost_ptr = &cost[n * 2 + 1]; + for (int m = 0; m < 5; m++) + *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m]; + *cost_ptr *= 105; + for (int m = 0; m < 3; m++) { + const int d = div_table[2 * m + 1]; + *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] + + partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d; + } + } + + int best_dir = 0; + unsigned best_cost = cost[0]; + for (int n = 1; n < 8; n++) { + if (cost[n] > best_cost) { + best_cost = cost[n]; + best_dir = n; + } + } + + *var = (best_cost - (cost[best_dir ^ 4])) >> 10; + return best_dir;*/ + return 0; +} diff --git a/media/video/av1/src/ppc/cdef_tmpl_svp64_real.s b/media/video/av1/src/ppc/cdef_tmpl_svp64_real.s new file mode 100644 index 00000000..adbe99c4 --- /dev/null +++ b/media/video/av1/src/ppc/cdef_tmpl_svp64_real.s @@ -0,0 +1,208 @@ +.set y, 1 +.set x, 2 + +.set img_ptr, 3 +.set stride, 4 +.set var, 5 +.set bd, 6 # bitdepth_min_8 + +.set cost, 7 # cost array, 8 elements +.set divt, 14 # div_table[8] +.set img, 24 # img array, 8x8 = 64 elements +.set psum, 88 # We will place the results of the psums here +.set tmp, 108 # temporary elements +.set tmp2, 116 # temporary elements + + + .machine libresoc + .file "cdef_tmpl_svp64_real.c" + .abiversion 2 + .section ".text" + .align 2 + .globl cdef_find_dir_svp64_real + .type cdef_find_dir_svp64_real, @function +cdef_find_dir_svp64_real: +.L0: + .cfi_startproc + # Load div_table[7] array + # div_table[7] = { 840, 420, 280, 210, 168, 140, 120 }; + li divt+0, 840 + li divt+1, 420 + li divt+2, 280 + li divt+3, 210 + li divt+4, 168 + li divt+5, 140 + li divt+6, 120 + li divt+7, 105 # Add 105 as element 8 of the divt table + # saves having to do special case for it + +.L1: + # Load 8x8 8-bit elements from img_ptr in groups of 8 with stride + setvl 0,0,8,0,1,1 # Set VL to 8 elements + sv.lha *img, 0(img_ptr) # Load 8 ints from (img_ptr) + add img_ptr, img_ptr, stride # Advance img_ptr by stride + sv.lha *img + 8, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 16, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 24, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 32, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 40, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 48, 0(img_ptr) + add img_ptr, img_ptr, stride + sv.lha *img + 56, 0(img_ptr) + + setvl 0,0,64,0,1,1 # Set VL to 64 elements + sv.sraw *img, *img, bd # img[x] >> bitdepth_min_8 + sv.addi *img, *img, -128 # px = (img[x] >> bitdepth_min_8) - 128 + + # Zero psum registers for partial_sum_hv + setvl 0,0,16,0,1,1 # Set VL to 16 elements + sv.ori *psum, 0, 0 + + # First do the horizontal partial sums: + # partial_sum_hv[0][y] += px; + setvl 0,0,8,0,1,1 # Set VL to 8 elements + sv.add/mr psum+0, psum+0, *img+0 + sv.add/mr psum+1, psum+1, *img+8 + sv.add/mr psum+2, psum+2, *img+16 + sv.add/mr psum+3, psum+3, *img+24 + sv.add/mr psum+4, psum+4, *img+32 + sv.add/mr psum+5, psum+5, *img+40 + sv.add/mr psum+6, psum+6, *img+48 + sv.add/mr psum+7, psum+7, *img+56 + + # Next the vertical partial sums: + # partial_sum_hv[1][x] += px; + sv.add/mr *psum+8, *psum+8, *img+0 + sv.add/mr *psum+8, *psum+8, *img+8 + sv.add/mr *psum+8, *psum+8, *img+16 + sv.add/mr *psum+8, *psum+8, *img+24 + sv.add/mr *psum+8, *psum+8, *img+32 + sv.add/mr *psum+8, *psum+8, *img+40 + sv.add/mr *psum+8, *psum+8, *img+48 + sv.add/mr *psum+8, *psum+8, *img+56 + + # Zero cost registers + setvl 0,0,8,0,1,1 # Set VL to 8 elements + sv.ori *cost, 0, 0 + + # cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n]; + sv.maddld/mr cost+2, *psum, *psum, cost+2 + # cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n]; + sv.maddld/mr cost+6, *psum+8, *psum+8, cost+6 + + # cost[2] *= 105 + # cost[6] *= 105 + mulli cost+2, cost+2, 105 + mulli cost+6, cost+6, 105 + + # We're done with partial_sum_hv values, we can reuse the registers + # for partial_sum_diag + # Zero psum registers for partial_sum_diag + setvl 0,0,30,0,1,1 # Set VL to 30 elements + sv.ori *psum, 0, 0 + + setvl 0,0,8,0,1,1 # Set VL to 8 elements + # First row of diagonal partial sums: + # partial_sum_diag[0][y + x] += px; + sv.add/mr *psum+0, *psum+0, *img+0 + sv.add/mr *psum+1, *psum+1, *img+8 + sv.add/mr *psum+2, *psum+2, *img+16 + sv.add/mr *psum+3, *psum+3, *img+24 + sv.add/mr *psum+4, *psum+4, *img+32 + sv.add/mr *psum+5, *psum+5, *img+40 + sv.add/mr *psum+6, *psum+6, *img+48 + sv.add/mr *psum+7, *psum+7, *img+56 + + # Second row of diagonal partial sums: + # partial_sum_diag[1][7 + y - x] += px; + sv.add/mr *psum+15, *psum+15, *img+56 + sv.add/mr *psum+16, *psum+16, *img+48 + sv.add/mr *psum+17, *psum+17, *img+40 + sv.add/mr *psum+18, *psum+18, *img+32 + sv.add/mr *psum+19, *psum+19, *img+24 + sv.add/mr *psum+20, *psum+20, *img+16 + sv.add/mr *psum+21, *psum+21, *img+8 + sv.add/mr *psum+22, *psum+22, *img+0 + # these were calculated correctly but in reverse order, + # but since they're going to be used in a sum, order is not important. + + setvl 0,0,15,0,1,1 # Set VL to 15 elements + sv.ori *tmp, 0, 0 + + # cost[0] += (partial_sum_diag[0][n] * partial_sum_diag[0][n] + + # partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d; + # Produce squares of all values + sv.maddld/mr *tmp, *psum+0, *psum+0, *tmp + # Handle the first 8 elements in order, *includes* partial_sum_diag[0][7]! + #setvl 0,0,8,0,1,1 # Set VL to 8 elements + #sv.mulld *tmp, *tmp, *divt + # Handle remaining 7 elements, in reverse order + setvl 0,0,7,0,1,1 # Set VL to 7 elements + sv.svstep/mrr *tmp2, 6, 1 + svindex 29,0b1,7,0,0,0,0 + sv.ori *tmp, *divt, 0 + #sv.mulld *tmp, *tmp, *divt + # Now sum those up to cost[0] element + #setvl 0,0,15,0,1,1 # Set VL to 15 elements + #sv.add/mr cost+0, *tmp, cost+0 + + # Similarly for cost[4] + # cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] + + # partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d; + #sv.maddld/mr *tmp, *psum+16, *psum+16, *tmp + #sv.maddld/mr *tmp, *psum+24, *psum+24, *tmp + #sv.mulld *tmp, *tmp, *divt + #sv.add/mr cost+4, *tmp, cost+4 + + + # Zero psum registers for partial_sum_alt, process half of 44 + #setvl 0,0,22,0,1,1 # Set VL to 22 elements + #sv.ori psum, 0, 0 + + # First row of alt partial sums: + # partial_sum_alt [0][y + (x >> 1)] += px; + # These are essentially calculated the following way: + # horiz axis: x, vert axis: y, quantity of y + (x>>1): + # + # | | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + # | 0 | 0 | 0 | 1 | 1 | 2 | 2 | 3 | 3 | + # | 1 | 1 | 1 | 2 | 2 | 3 | 3 | 4 | 4 | + # | 2 | 2 | 2 | 3 | 3 | 4 | 4 | 5 | 5 | + # | 3 | 3 | 3 | 4 | 4 | 5 | 5 | 6 | 6 | + # | 4 | 4 | 4 | 5 | 5 | 6 | 6 | 7 | 7 | + # | 5 | 5 | 5 | 6 | 6 | 7 | 7 | 8 | 8 | + # | 6 | 6 | 6 | 7 | 7 | 8 | 8 | 9 | 9 | + # | 7 | 7 | 7 | 8 | 8 | 9 | 9 | a | a | + # + # We calculate this in a similar manner to the diagonal + # partial sums, but first we have to do pair-wise addition + # on all the elements of the img matrix: + #setvl 0,0,64,0,1,1 # Set VL to 64 elements + #svstep 2 + #sv.add *img, *img, *img+1 + + #setvl 0,0,8,0,1,1 # Set VL to 8 elements + #sv.add *psum+0, *psum+0, *img+0 + #sv.add *psum+0, *psum+0, *img+1 + #sv.add *psum+1, *psum+1, *img+8 + #sv.add *psum+1, *psum+1, *img+9 + + + #setvl 0,0,10,0,1,1 # Set VL to 2 elements + #sv.add/mr *psum, *psum, *psum+1 +# + + + blr + .long 0 + .byte 0,0,0,0,0,0,0,0 + .cfi_endproc +.LFE27: + .size cdef_find_dir_svp64_real,.-cdef_find_dir_svp64_real + .ident "GCC: (Debian 8.3.0-6) 8.3.0" + .section .note.GNU-stack,"",@progbits diff --git a/media/video/av1/src/ppc/cdef_tmpl_svp64_wrapper.c b/media/video/av1/src/ppc/cdef_tmpl_svp64_wrapper.c new file mode 100644 index 00000000..0bc328c7 --- /dev/null +++ b/media/video/av1/src/ppc/cdef_tmpl_svp64_wrapper.c @@ -0,0 +1,168 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include +#include +#include +#include + +#include "pypowersim_wrapper_common.h" + +#include "common/intops.h" + +#include "src/ppc/cdef.h" +#include "src/tables.h" + +int cdef_find_dir_svp64(const pixel *img, const ptrdiff_t stride, + unsigned *const var HIGHBD_DECL_SUFFIX) +{ + printf("img: %p, stride: %d, var: %p\n", img, stride, var); + // It cannot be the same pointer as the original function, as it is really a separate CPU/RAM + // we have to memcpy from input to this pointer, the address was chosen arbitrarily + uint64_t img_svp64 = 0x100000; + uint64_t var_svp64 = 0x200000; + + // Create the pypowersim_state + pypowersim_state_t *state = pypowersim_prepare(); + + // Change the relevant elements, mandatory: body + state->binary = PyBytes_FromStringAndSize((const char *)&cdef_find_dir_svp64_real, 1000); + + // Set GPR #3 to the img pointer + PyObject *img_address = PyLong_FromUnsignedLongLong(img_svp64); + PyList_SetItem(state->initial_regs, 3, img_address); + + // Set GPR #4 to the output pointer + PyObject *stride_svp64 = PyLong_FromUnsignedLongLong(stride); + PyList_SetItem(state->initial_regs, 4, stride_svp64); + + // Load data into PyPowersim buffer from real memory + for (int i=0; i < 8; i++) { + for (int j=0; j < 8; j += 4) { + PyObject *svp64_address = PyLong_FromUnsignedLongLong(img_svp64 + j*2); + uint64_t val = (uint64_t)(img[j + 0]) & 0xffff; + val |= ((uint64_t)(img[j + 1]) & 0xffff) << 16; + val |= ((uint64_t)(img[j + 2]) & 0xffff) << 32; + val |= ((uint64_t)(img[j + 3]) & 0xffff) << 48; + printf("img: %p -> %04x %04x %04x %04x\t val: %016lx -> %p\n", img + j, (uint16_t)img[j + 0], (uint16_t)img[j + 1], (uint16_t)img[j + 2], (uint16_t)img[j + 3], val, img_svp64 + j*2); +/* + uint64_t val = (uint64_t)(img[0]) & 0xff; + val |= ((uint64_t)(img[1]) & 0xff) << 8; + val |= ((uint64_t)(img[2]) & 0xff) << 16; + val |= ((uint64_t)(img[3]) & 0xff) << 24; + val |= ((uint64_t)(img[4]) & 0xff) << 32; + val |= ((uint64_t)(img[5]) & 0xff) << 40; + val |= ((uint64_t)(img[6]) & 0xff) << 48; + val |= ((uint64_t)(img[7]) & 0xff) << 56; + printf("src: %p -> %02x %02x %02x %02x %02x %02x %02x %02x\t val: %016lx -> %p\n", img, (uint8_t)img[0], (uint8_t)img[1], (uint8_t)img[2], (uint8_t)img[3], (uint8_t)img[4], (uint8_t)img[5], (uint8_t)img[6], (uint8_t)img[7], val, img_svp64);*/ + PyObject *word = PyLong_FromUnsignedLongLong(val); + PyDict_SetItem(state->initial_mem, svp64_address, word); + } + img += stride/2; + img_svp64 += stride; + } + + // Set GPR #5 to the var pointer, and clear the address + PyObject *var_address = PyLong_FromUnsignedLongLong(var_svp64); + PyList_SetItem(state->initial_regs, 5, img_address); + { + PyObject *svp64_address = PyLong_FromUnsignedLongLong(var_svp64); + PyObject *word = PyLong_FromUnsignedLongLong(0); + PyDict_SetItem(state->initial_mem, svp64_address, word); + } + +#if BITDEPTH == 16 + const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8; + PyObject *bitdepth = PyLong_FromUnsignedLongLong(bitdepth_min_8); + PyList_SetItem(state->initial_regs, 6, bitdepth); +#endif + + // Prepare the arguments object for the call + pypowersim_prepareargs(state); + + // Call the function and get the resulting object + state->result_obj = PyObject_CallObject(state->simulator, state->args); + if (!state->result_obj) { + PyErr_Print(); + printf("Error invoking 'run_a_simulation'\n"); + pypowersim_finalize(state); + exit(1); + } + + // Get the GPRs from the result_obj + PyObject *final_regs = PyObject_GetAttrString(state->result_obj, "gpr"); + if (!final_regs) { + PyErr_Print(); + printf("Error getting final GPRs\n"); + pypowersim_finalize(state); + exit(1); + } + + PyObject *memobj = PyObject_GetAttrString(state->result_obj, "mem"); + if (!memobj) { + PyErr_Print(); + Py_DECREF(state->result_obj); + printf("Error getting mem object\n"); + } + + PyObject *mem = PyObject_GetAttrString(memobj, "mem"); + if (!mem) { + PyErr_Print(); + Py_DECREF(state->result_obj); + printf("Error getting mem dict\n"); + } + { + PyObject *svp64_address = PyLong_FromUnsignedLongLong((var_svp64)/8); + PyObject *pyval = PyDict_GetItem(mem, svp64_address); + uint64_t val = PyLong_AsUnsignedLongLong(pyval); + *var = (uint32_t) val; + printf("output: %p -> %08x\t val: %016lx -> %p\n", var, *var, val, var_svp64); + } + + // GPR #3 holds the return value as an integer + PyObject *key = PyLong_FromLongLong(3); + PyObject *itm = PyDict_GetItem(final_regs, key); + if (!itm) { + PyErr_Print(); + printf("Error getting GPR #3\n"); + pypowersim_finalize(state); + exit(1); + } + PyObject *value = PyObject_GetAttrString(itm, "value"); + if (!value) { + PyErr_Print(); + printf("Error getting value of GPR #3\n"); + pypowersim_finalize(state); + exit(1); + } + uint64_t val = PyLong_AsUnsignedLongLong(value); + + // Return value + return (uint32_t) val; +} diff --git a/media/video/av1/src/ppc/cpu.c b/media/video/av1/src/ppc/cpu.c new file mode 100644 index 00000000..b7fb7b39 --- /dev/null +++ b/media/video/av1/src/ppc/cpu.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2019, VideoLAN and dav1d authors + * Copyright © 2019, Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "common/attributes.h" + +#include "src/ppc/cpu.h" + +#if (defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)) && ARCH_PPC64LE +#include +#define HAVE_AUX +#endif + +COLD unsigned dav1d_get_cpu_flags_ppc(void) { + unsigned flags = 0; +#if defined(HAVE_GETAUXVAL) && ARCH_PPC64LE + unsigned long hw_cap = getauxval(AT_HWCAP); +#elif defined(HAVE_ELF_AUX_INFO) && ARCH_PPC64LE + unsigned long hw_cap = 0; + elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); +#endif +#ifdef HAVE_AUX + flags |= (hw_cap & PPC_FEATURE_HAS_VSX) ? DAV1D_PPC_CPU_FLAG_VSX : 0; +#endif +#ifdef HAVE_SVP64 + flags |= DAV1D_PPC_CPU_FLAG_SVP64; +#endif + return flags; +} diff --git a/media/video/av1/src/ppc/cpu.h b/media/video/av1/src/ppc/cpu.h new file mode 100644 index 00000000..1f371929 --- /dev/null +++ b/media/video/av1/src/ppc/cpu.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2019, VideoLAN and dav1d authors + * Copyright © 2019, Janne Grunau + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_PPC_CPU_H +#define DAV1D_SRC_PPC_CPU_H + +enum CpuFlags { + DAV1D_PPC_CPU_FLAG_VSX = 1 << 0, + DAV1D_PPC_CPU_FLAG_SVP64 = 1 << 1, +}; + +unsigned dav1d_get_cpu_flags_ppc(void); + +#endif /* DAV1D_SRC_PPC_CPU_H */ diff --git a/media/video/av1/src/ppc/dav1d_types.h b/media/video/av1/src/ppc/dav1d_types.h new file mode 100644 index 00000000..0b4bd72f --- /dev/null +++ b/media/video/av1/src/ppc/dav1d_types.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2019, VideoLAN and dav1d authors + * Copyright © 2019, Luca Barbato + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_PPC_TYPES_H +#define DAV1D_SRC_PPC_TYPES_H + +#include +#undef pixel + +#define u8x16 vector unsigned char +#define i8x16 vector signed char +#define b8x16 vector bool char +#define u16x8 vector unsigned short +#define i16x8 vector signed short +#define b16x8 vector bool short +#define u32x4 vector unsigned int +#define i32x4 vector signed int +#define b32x4 vector bool int +#define u64x2 vector unsigned long long +#define i64x2 vector signed long long +#define b64x2 vector bool long long + +#define u8h_to_u16(v) ((u16x8) vec_mergeh((u8x16) v, vec_splat_u8(0))) +#define u8l_to_u16(v) ((u16x8) vec_mergel((u8x16) v, vec_splat_u8(0))) +#define u16h_to_i32(v) ((i32x4) vec_mergeh((u16x8) v, vec_splat_u16(0))) +#define i16h_to_i32(v) ((i32x4) vec_unpackh((i16x8)v)) +#define u16l_to_i32(v) ((i32x4) vec_mergel((u16x8) v, vec_splat_u16(0))) +#define i16l_to_i32(v) ((i32x4) vec_unpackl((i16x8)v)) + +#endif /* DAV1D_SRC_PPC_TYPES_H */ diff --git a/media/video/av1/src/ref.h b/media/video/av1/src/ref.h new file mode 100644 index 00000000..ec070a0a --- /dev/null +++ b/media/video/av1/src/ref.h @@ -0,0 +1,60 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_REF_H +#define DAV1D_SRC_REF_H + +#include "dav1d/dav1d.h" + +#include "src/mem.h" +#include "src/thread.h" + +#include +#include + +struct Dav1dRef { + void *data; + const void *const_data; + atomic_int ref_cnt; + int free_ref; + void (*free_callback)(const uint8_t *data, void *user_data); + void *user_data; +}; + +Dav1dRef *dav1d_ref_create(size_t size); +Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *pool, size_t size); +Dav1dRef *dav1d_ref_wrap(const uint8_t *ptr, + void (*free_callback)(const uint8_t *data, void *user_data), + void *user_data); +void dav1d_ref_dec(Dav1dRef **ref); +int dav1d_ref_is_writable(Dav1dRef *ref); + +static inline void dav1d_ref_inc(Dav1dRef *const ref) { + atomic_fetch_add_explicit(&ref->ref_cnt, 1, memory_order_relaxed); +} + +#endif /* DAV1D_SRC_REF_H */ diff --git a/media/video/av1/src/tables.c b/media/video/av1/src/tables.c new file mode 100644 index 00000000..9752f15c --- /dev/null +++ b/media/video/av1/src/tables.c @@ -0,0 +1,1013 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include + +#include "common/attributes.h" + +#include "src/levels.h" +#include "src/tables.h" + +const uint8_t dav1d_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS] = { + { + // partitions: + // none, h, v, split, tts, tbs, tls, trs, h4, v4 + { 0x00, 0x00, 0x10, -1, 0x00, 0x10, 0x10, 0x10, -1, -1 }, // bl128 + { 0x10, 0x10, 0x18, -1, 0x10, 0x18, 0x18, 0x18, 0x10, 0x1c }, // bl64 + { 0x18, 0x18, 0x1c, -1, 0x18, 0x1c, 0x1c, 0x1c, 0x18, 0x1e }, // bl32 + { 0x1c, 0x1c, 0x1e, -1, 0x1c, 0x1e, 0x1e, 0x1e, 0x1c, 0x1f }, // bl16 + { 0x1e, 0x1e, 0x1f, 0x1f, -1, -1, -1, -1, -1, -1 }, // bl8 + }, { + { 0x00, 0x10, 0x00, -1, 0x10, 0x10, 0x00, 0x10, -1, -1 }, // bl128 + { 0x10, 0x18, 0x10, -1, 0x18, 0x18, 0x10, 0x18, 0x1c, 0x10 }, // bl64 + { 0x18, 0x1c, 0x18, -1, 0x1c, 0x1c, 0x18, 0x1c, 0x1e, 0x18 }, // bl32 + { 0x1c, 0x1e, 0x1c, -1, 0x1e, 0x1e, 0x1c, 0x1e, 0x1f, 0x1c }, // bl16 + { 0x1e, 0x1f, 0x1e, 0x1f, -1, -1, -1, -1, -1, -1 }, // bl8 + } +}; + +const uint8_t /* enum BlockSize */ + dav1d_block_sizes[N_BL_LEVELS][N_PARTITIONS][2] = +{ + [BL_128X128] = { + [PARTITION_NONE] = { BS_128x128 }, + [PARTITION_H] = { BS_128x64 }, + [PARTITION_V] = { BS_64x128 }, + [PARTITION_T_TOP_SPLIT] = { BS_64x64, BS_128x64 }, + [PARTITION_T_BOTTOM_SPLIT] = { BS_128x64, BS_64x64 }, + [PARTITION_T_LEFT_SPLIT] = { BS_64x64, BS_64x128 }, + [PARTITION_T_RIGHT_SPLIT] = { BS_64x128, BS_64x64 }, + }, [BL_64X64] = { + [PARTITION_NONE] = { BS_64x64 }, + [PARTITION_H] = { BS_64x32 }, + [PARTITION_V] = { BS_32x64 }, + [PARTITION_T_TOP_SPLIT] = { BS_32x32, BS_64x32 }, + [PARTITION_T_BOTTOM_SPLIT] = { BS_64x32, BS_32x32 }, + [PARTITION_T_LEFT_SPLIT] = { BS_32x32, BS_32x64 }, + [PARTITION_T_RIGHT_SPLIT] = { BS_32x64, BS_32x32 }, + [PARTITION_H4] = { BS_64x16 }, + [PARTITION_V4] = { BS_16x64 }, + }, [BL_32X32] = { + [PARTITION_NONE] = { BS_32x32 }, + [PARTITION_H] = { BS_32x16 }, + [PARTITION_V] = { BS_16x32 }, + [PARTITION_T_TOP_SPLIT] = { BS_16x16, BS_32x16 }, + [PARTITION_T_BOTTOM_SPLIT] = { BS_32x16, BS_16x16 }, + [PARTITION_T_LEFT_SPLIT] = { BS_16x16, BS_16x32 }, + [PARTITION_T_RIGHT_SPLIT] = { BS_16x32, BS_16x16 }, + [PARTITION_H4] = { BS_32x8 }, + [PARTITION_V4] = { BS_8x32 }, + }, [BL_16X16] = { + [PARTITION_NONE] = { BS_16x16 }, + [PARTITION_H] = { BS_16x8 }, + [PARTITION_V] = { BS_8x16 }, + [PARTITION_T_TOP_SPLIT] = { BS_8x8, BS_16x8 }, + [PARTITION_T_BOTTOM_SPLIT] = { BS_16x8, BS_8x8 }, + [PARTITION_T_LEFT_SPLIT] = { BS_8x8, BS_8x16 }, + [PARTITION_T_RIGHT_SPLIT] = { BS_8x16, BS_8x8 }, + [PARTITION_H4] = { BS_16x4 }, + [PARTITION_V4] = { BS_4x16 }, + }, [BL_8X8] = { + [PARTITION_NONE] = { BS_8x8 }, + [PARTITION_H] = { BS_8x4 }, + [PARTITION_V] = { BS_4x8 }, + [PARTITION_SPLIT] = { BS_4x4 }, + } +}; + +const uint8_t dav1d_block_dimensions[N_BS_SIZES][4] = { + [BS_128x128] = { 32, 32, 5, 5 }, + [BS_128x64] = { 32, 16, 5, 4 }, + [BS_64x128] = { 16, 32, 4, 5 }, + [BS_64x64] = { 16, 16, 4, 4 }, + [BS_64x32] = { 16, 8, 4, 3 }, + [BS_64x16] = { 16, 4, 4, 2 }, + [BS_32x64] = { 8, 16, 3, 4 }, + [BS_32x32] = { 8, 8, 3, 3 }, + [BS_32x16] = { 8, 4, 3, 2 }, + [BS_32x8] = { 8, 2, 3, 1 }, + [BS_16x64] = { 4, 16, 2, 4 }, + [BS_16x32] = { 4, 8, 2, 3 }, + [BS_16x16] = { 4, 4, 2, 2 }, + [BS_16x8] = { 4, 2, 2, 1 }, + [BS_16x4] = { 4, 1, 2, 0 }, + [BS_8x32] = { 2, 8, 1, 3 }, + [BS_8x16] = { 2, 4, 1, 2 }, + [BS_8x8] = { 2, 2, 1, 1 }, + [BS_8x4] = { 2, 1, 1, 0 }, + [BS_4x16] = { 1, 4, 0, 2 }, + [BS_4x8] = { 1, 2, 0, 1 }, + [BS_4x4] = { 1, 1, 0, 0 }, +}; + +const TxfmInfo dav1d_txfm_dimensions[N_RECT_TX_SIZES] = { + [ TX_4X4] = { .w = 1, .h = 1, .lw = 0, .lh = 0, + .min = 0, .max = 0, .ctx = 0 }, + [ TX_8X8] = { .w = 2, .h = 2, .lw = 1, .lh = 1, + .min = 1, .max = 1, .sub = TX_4X4, .ctx = 1 }, + [ TX_16X16] = { .w = 4, .h = 4, .lw = 2, .lh = 2, + .min = 2, .max = 2, .sub = TX_8X8, .ctx = 2 }, + [ TX_32X32] = { .w = 8, .h = 8, .lw = 3, .lh = 3, + .min = 3, .max = 3, .sub = TX_16X16, .ctx = 3 }, + [ TX_64X64] = { .w = 16, .h = 16, .lw = 4, .lh = 4, + .min = 4, .max = 4, .sub = TX_32X32, .ctx = 4 }, + [RTX_4X8] = { .w = 1, .h = 2, .lw = 0, .lh = 1, + .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 }, + [RTX_8X4] = { .w = 2, .h = 1, .lw = 1, .lh = 0, + .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 }, + [RTX_8X16] = { .w = 2, .h = 4, .lw = 1, .lh = 2, + .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 }, + [RTX_16X8] = { .w = 4, .h = 2, .lw = 2, .lh = 1, + .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 }, + [RTX_16X32] = { .w = 4, .h = 8, .lw = 2, .lh = 3, + .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 }, + [RTX_32X16] = { .w = 8, .h = 4, .lw = 3, .lh = 2, + .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 }, + [RTX_32X64] = { .w = 8, .h = 16, .lw = 3, .lh = 4, + .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 }, + [RTX_64X32] = { .w = 16, .h = 8, .lw = 4, .lh = 3, + .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 }, + [RTX_4X16] = { .w = 1, .h = 4, .lw = 0, .lh = 2, + .min = 0, .max = 2, .sub = RTX_4X8, .ctx = 1 }, + [RTX_16X4] = { .w = 4, .h = 1, .lw = 2, .lh = 0, + .min = 0, .max = 2, .sub = RTX_8X4, .ctx = 1 }, + [RTX_8X32] = { .w = 2, .h = 8, .lw = 1, .lh = 3, + .min = 1, .max = 3, .sub = RTX_8X16, .ctx = 2 }, + [RTX_32X8] = { .w = 8, .h = 2, .lw = 3, .lh = 1, + .min = 1, .max = 3, .sub = RTX_16X8, .ctx = 2 }, + [RTX_16X64] = { .w = 4, .h = 16, .lw = 2, .lh = 4, + .min = 2, .max = 4, .sub = RTX_16X32, .ctx = 3 }, + [RTX_64X16] = { .w = 16, .h = 4, .lw = 4, .lh = 2, + .min = 2, .max = 4, .sub = RTX_32X16, .ctx = 3 }, +}; + +const uint8_t /* enum (Rect)TxfmSize */ + dav1d_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */] = +{ + [BS_128x128] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 }, + [BS_128x64] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 }, + [BS_64x128] = { TX_64X64, TX_32X32, 0, TX_32X32 }, + [BS_64x64] = { TX_64X64, TX_32X32, TX_32X32, TX_32X32 }, + [BS_64x32] = { RTX_64X32, RTX_32X16, TX_32X32, TX_32X32 }, + [BS_64x16] = { RTX_64X16, RTX_32X8, RTX_32X16, RTX_32X16 }, + [BS_32x64] = { RTX_32X64, RTX_16X32, 0, TX_32X32 }, + [BS_32x32] = { TX_32X32, TX_16X16, RTX_16X32, TX_32X32 }, + [BS_32x16] = { RTX_32X16, RTX_16X8, TX_16X16, RTX_32X16 }, + [BS_32x8] = { RTX_32X8, RTX_16X4, RTX_16X8, RTX_32X8 }, + [BS_16x64] = { RTX_16X64, RTX_8X32, 0, RTX_16X32 }, + [BS_16x32] = { RTX_16X32, RTX_8X16, 0, RTX_16X32 }, + [BS_16x16] = { TX_16X16, TX_8X8, RTX_8X16, TX_16X16 }, + [BS_16x8] = { RTX_16X8, RTX_8X4, TX_8X8, RTX_16X8 }, + [BS_16x4] = { RTX_16X4, RTX_8X4, RTX_8X4, RTX_16X4 }, + [BS_8x32] = { RTX_8X32, RTX_4X16, 0, RTX_8X32 }, + [BS_8x16] = { RTX_8X16, RTX_4X8, 0, RTX_8X16 }, + [BS_8x8] = { TX_8X8, TX_4X4, RTX_4X8, TX_8X8 }, + [BS_8x4] = { RTX_8X4, TX_4X4, TX_4X4, RTX_8X4 }, + [BS_4x16] = { RTX_4X16, RTX_4X8, 0, RTX_4X16 }, + [BS_4x8] = { RTX_4X8, TX_4X4, 0, RTX_4X8 }, + [BS_4x4] = { TX_4X4, TX_4X4, TX_4X4, TX_4X4 }, +}; + +const uint8_t /* enum TxfmType */ + dav1d_txtp_from_uvmode[N_UV_INTRA_PRED_MODES] = +{ + [DC_PRED] = DCT_DCT, + [VERT_PRED] = ADST_DCT, + [HOR_PRED] = DCT_ADST, + [DIAG_DOWN_LEFT_PRED] = DCT_DCT, + [DIAG_DOWN_RIGHT_PRED] = ADST_ADST, + [VERT_RIGHT_PRED] = ADST_DCT, + [HOR_DOWN_PRED] = DCT_ADST, + [HOR_UP_PRED] = DCT_ADST, + [VERT_LEFT_PRED] = ADST_DCT, + [SMOOTH_PRED] = ADST_ADST, + [SMOOTH_V_PRED] = ADST_DCT, + [SMOOTH_H_PRED] = DCT_ADST, + [PAETH_PRED] = ADST_ADST, +}; + +const uint8_t /* enum InterPredMode */ + dav1d_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2] = +{ + [NEARESTMV_NEARESTMV] = { NEARESTMV, NEARESTMV }, + [NEARMV_NEARMV] = { NEARMV, NEARMV }, + [NEWMV_NEWMV] = { NEWMV, NEWMV }, + [GLOBALMV_GLOBALMV] = { GLOBALMV, GLOBALMV }, + [NEWMV_NEARESTMV] = { NEWMV, NEARESTMV }, + [NEWMV_NEARMV] = { NEWMV, NEARMV }, + [NEARESTMV_NEWMV] = { NEARESTMV, NEWMV }, + [NEARMV_NEWMV] = { NEARMV, NEWMV }, +}; + +const uint8_t dav1d_partition_type_count[N_BL_LEVELS] = { + [BL_128X128] = N_PARTITIONS - 3, + [BL_64X64] = N_PARTITIONS - 1, + [BL_32X32] = N_PARTITIONS - 1, + [BL_16X16] = N_PARTITIONS - 1, + [BL_8X8] = N_SUB8X8_PARTITIONS - 1, +}; + +const uint8_t /* enum TxfmType */ dav1d_tx_types_per_set[40] = { + /* Intra2 */ + IDTX, DCT_DCT, ADST_ADST, ADST_DCT, DCT_ADST, + /* Intra1 */ + IDTX, DCT_DCT, V_DCT, H_DCT, ADST_ADST, ADST_DCT, DCT_ADST, + /* Inter2 */ + IDTX, V_DCT, H_DCT, DCT_DCT, ADST_DCT, DCT_ADST, FLIPADST_DCT, + DCT_FLIPADST, ADST_ADST, FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, + /* Inter1 */ + IDTX, V_DCT, H_DCT, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST, + DCT_DCT, ADST_DCT, DCT_ADST, FLIPADST_DCT, DCT_FLIPADST, + ADST_ADST, FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, +}; + +const uint8_t dav1d_ymode_size_context[N_BS_SIZES] = { + [BS_128x128] = 3, + [BS_128x64] = 3, + [BS_64x128] = 3, + [BS_64x64] = 3, + [BS_64x32] = 3, + [BS_64x16] = 2, + [BS_32x64] = 3, + [BS_32x32] = 3, + [BS_32x16] = 2, + [BS_32x8 ] = 1, + [BS_16x64] = 2, + [BS_16x32] = 2, + [BS_16x16] = 2, + [BS_16x8 ] = 1, + [BS_16x4 ] = 0, + [BS_8x32 ] = 1, + [BS_8x16 ] = 1, + [BS_8x8 ] = 1, + [BS_8x4 ] = 0, + [BS_4x16 ] = 0, + [BS_4x8 ] = 0, + [BS_4x4 ] = 0, +}; + +const uint8_t dav1d_lo_ctx_offsets[3][5][5] = { + { /* w == h */ + { 0, 1, 6, 6, 21 }, + { 1, 6, 6, 21, 21 }, + { 6, 6, 21, 21, 21 }, + { 6, 21, 21, 21, 21 }, + { 21, 21, 21, 21, 21 }, + }, { /* w > h */ + { 0, 16, 6, 6, 21 }, + { 16, 16, 6, 21, 21 }, + { 16, 16, 21, 21, 21 }, + { 16, 16, 21, 21, 21 }, + { 16, 16, 21, 21, 21 }, + }, { /* w < h */ + { 0, 11, 11, 11, 11 }, + { 11, 11, 11, 11, 11 }, + { 6, 6, 21, 21, 21 }, + { 6, 21, 21, 21, 21 }, + { 21, 21, 21, 21, 21 }, + }, +}; + +const uint8_t dav1d_skip_ctx[5][5] = { + { 1, 2, 2, 2, 3 }, + { 2, 4, 4, 4, 5 }, + { 2, 4, 4, 4, 5 }, + { 2, 4, 4, 4, 5 }, + { 3, 5, 5, 5, 6 }, +}; + +const uint8_t /* enum TxClass */ dav1d_tx_type_class[N_TX_TYPES_PLUS_LL] = { + [DCT_DCT] = TX_CLASS_2D, + [ADST_DCT] = TX_CLASS_2D, + [DCT_ADST] = TX_CLASS_2D, + [ADST_ADST] = TX_CLASS_2D, + [FLIPADST_DCT] = TX_CLASS_2D, + [DCT_FLIPADST] = TX_CLASS_2D, + [FLIPADST_FLIPADST] = TX_CLASS_2D, + [ADST_FLIPADST] = TX_CLASS_2D, + [FLIPADST_ADST] = TX_CLASS_2D, + [IDTX] = TX_CLASS_2D, + [V_DCT] = TX_CLASS_V, + [H_DCT] = TX_CLASS_H, + [V_ADST] = TX_CLASS_V, + [H_ADST] = TX_CLASS_H, + [V_FLIPADST] = TX_CLASS_V, + [H_FLIPADST] = TX_CLASS_H, + [WHT_WHT] = TX_CLASS_2D, +}; + +const uint8_t /* enum Filter2d */ dav1d_filter_2d[DAV1D_N_FILTERS][DAV1D_N_FILTERS] = { + [DAV1D_FILTER_8TAP_REGULAR] = { + [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_REGULAR, + [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_REGULAR_SHARP, + [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_REGULAR_SMOOTH, + }, [DAV1D_FILTER_8TAP_SHARP] = { + [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SHARP_REGULAR, + [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SHARP, + [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SHARP_SMOOTH, + }, [DAV1D_FILTER_8TAP_SMOOTH] = { + [DAV1D_FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SMOOTH_REGULAR, + [DAV1D_FILTER_8TAP_SHARP] = FILTER_2D_8TAP_SMOOTH_SHARP, + [DAV1D_FILTER_8TAP_SMOOTH] = FILTER_2D_8TAP_SMOOTH, + }, [DAV1D_FILTER_BILINEAR] = { + [DAV1D_FILTER_BILINEAR] = FILTER_2D_BILINEAR, + } +}; + +const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2] = { + [FILTER_2D_8TAP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR }, + [FILTER_2D_8TAP_REGULAR_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR }, + [FILTER_2D_8TAP_REGULAR_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR }, + [FILTER_2D_8TAP_SHARP_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP }, + [FILTER_2D_8TAP_SHARP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP }, + [FILTER_2D_8TAP_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP }, + [FILTER_2D_8TAP_SMOOTH_REGULAR] = { DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH }, + [FILTER_2D_8TAP_SMOOTH] = { DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH }, + [FILTER_2D_8TAP_SMOOTH_SHARP] = { DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH }, + [FILTER_2D_BILINEAR] = { DAV1D_FILTER_BILINEAR, DAV1D_FILTER_BILINEAR }, +}; + +const uint8_t dav1d_filter_mode_to_y_mode[5] = { + DC_PRED, VERT_PRED, HOR_PRED, HOR_DOWN_PRED, DC_PRED +}; + +const uint8_t dav1d_intra_mode_context[N_INTRA_PRED_MODES] = { + [DC_PRED] = 0, + [VERT_PRED] = 1, + [HOR_PRED] = 2, + [DIAG_DOWN_LEFT_PRED] = 3, + [DIAG_DOWN_RIGHT_PRED] = 4, + [VERT_RIGHT_PRED] = 4, + [HOR_DOWN_PRED] = 4, + [HOR_UP_PRED] = 4, + [VERT_LEFT_PRED] = 3, + [SMOOTH_PRED] = 0, + [SMOOTH_V_PRED] = 1, + [SMOOTH_H_PRED] = 2, + [PAETH_PRED] = 0, +}; + +const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES] = { + [BS_32x32] = 6, + [BS_32x16] = 5, + [BS_32x8] = 8, + [BS_16x32] = 4, + [BS_16x16] = 3, + [BS_16x8] = 2, + [BS_8x32] = 7, + [BS_8x16] = 1, + [BS_8x8] = 0, +}; + +const Dav1dWarpedMotionParams dav1d_default_wm_params = { + .type = DAV1D_WM_TYPE_IDENTITY, + .matrix = { + 0, 0, 1 << 16, + 0, 0, 1 << 16, + }, + .u.p.alpha = 0, + .u.p.beta = 0, + .u.p.gamma = 0, + .u.p.delta = 0, +}; + +const int8_t dav1d_cdef_directions[2 + 8 + 2 /* dir */][2 /* pass */] = { + { 1 * 12 + 0, 2 * 12 + 0 }, // 6 + { 1 * 12 + 0, 2 * 12 - 1 }, // 7 + { -1 * 12 + 1, -2 * 12 + 2 }, // 0 + { 0 * 12 + 1, -1 * 12 + 2 }, // 1 + { 0 * 12 + 1, 0 * 12 + 2 }, // 2 + { 0 * 12 + 1, 1 * 12 + 2 }, // 3 + { 1 * 12 + 1, 2 * 12 + 2 }, // 4 + { 1 * 12 + 0, 2 * 12 + 1 }, // 5 + { 1 * 12 + 0, 2 * 12 + 0 }, // 6 + { 1 * 12 + 0, 2 * 12 - 1 }, // 7 + { -1 * 12 + 1, -2 * 12 + 2 }, // 0 + { 0 * 12 + 1, -1 * 12 + 2 }, // 1 +}; + +const uint16_t ALIGN(dav1d_sgr_params[16][2], 4) = { + { 140, 3236 }, { 112, 2158 }, { 93, 1618 }, { 80, 1438 }, + { 70, 1295 }, { 58, 1177 }, { 47, 1079 }, { 37, 996 }, + { 30, 925 }, { 25, 863 }, { 0, 2589 }, { 0, 1618 }, + { 0, 1177 }, { 0, 925 }, { 56, 0 }, { 22, 0 }, +}; + +const uint8_t ALIGN(dav1d_sgr_x_by_x[256], 64) = { + 255, 128, 85, 64, 51, 43, 37, 32, 28, 26, 23, 21, 20, 18, 17, + 16, 15, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 9, 9, + 8, 8, 8, 8, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, + 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0 +}; + +const int8_t ALIGN(dav1d_mc_subpel_filters[6][15][8], 8) = { + [DAV1D_FILTER_8TAP_REGULAR] = { + { 0, 1, -3, 63, 4, -1, 0, 0 }, + { 0, 1, -5, 61, 9, -2, 0, 0 }, + { 0, 1, -6, 58, 14, -4, 1, 0 }, + { 0, 1, -7, 55, 19, -5, 1, 0 }, + { 0, 1, -7, 51, 24, -6, 1, 0 }, + { 0, 1, -8, 47, 29, -6, 1, 0 }, + { 0, 1, -7, 42, 33, -6, 1, 0 }, + { 0, 1, -7, 38, 38, -7, 1, 0 }, + { 0, 1, -6, 33, 42, -7, 1, 0 }, + { 0, 1, -6, 29, 47, -8, 1, 0 }, + { 0, 1, -6, 24, 51, -7, 1, 0 }, + { 0, 1, -5, 19, 55, -7, 1, 0 }, + { 0, 1, -4, 14, 58, -6, 1, 0 }, + { 0, 0, -2, 9, 61, -5, 1, 0 }, + { 0, 0, -1, 4, 63, -3, 1, 0 } + }, [DAV1D_FILTER_8TAP_SMOOTH] = { + { 0, 1, 14, 31, 17, 1, 0, 0 }, + { 0, 0, 13, 31, 18, 2, 0, 0 }, + { 0, 0, 11, 31, 20, 2, 0, 0 }, + { 0, 0, 10, 30, 21, 3, 0, 0 }, + { 0, 0, 9, 29, 22, 4, 0, 0 }, + { 0, 0, 8, 28, 23, 5, 0, 0 }, + { 0, -1, 8, 27, 24, 6, 0, 0 }, + { 0, -1, 7, 26, 26, 7, -1, 0 }, + { 0, 0, 6, 24, 27, 8, -1, 0 }, + { 0, 0, 5, 23, 28, 8, 0, 0 }, + { 0, 0, 4, 22, 29, 9, 0, 0 }, + { 0, 0, 3, 21, 30, 10, 0, 0 }, + { 0, 0, 2, 20, 31, 11, 0, 0 }, + { 0, 0, 2, 18, 31, 13, 0, 0 }, + { 0, 0, 1, 17, 31, 14, 1, 0 } + }, [DAV1D_FILTER_8TAP_SHARP] = { + { -1, 1, -3, 63, 4, -1, 1, 0 }, + { -1, 3, -6, 62, 8, -3, 2, -1 }, + { -1, 4, -9, 60, 13, -5, 3, -1 }, + { -2, 5, -11, 58, 19, -7, 3, -1 }, + { -2, 5, -11, 54, 24, -9, 4, -1 }, + { -2, 5, -12, 50, 30, -10, 4, -1 }, + { -2, 5, -12, 45, 35, -11, 5, -1 }, + { -2, 6, -12, 40, 40, -12, 6, -2 }, + { -1, 5, -11, 35, 45, -12, 5, -2 }, + { -1, 4, -10, 30, 50, -12, 5, -2 }, + { -1, 4, -9, 24, 54, -11, 5, -2 }, + { -1, 3, -7, 19, 58, -11, 5, -2 }, + { -1, 3, -5, 13, 60, -9, 4, -1 }, + { -1, 2, -3, 8, 62, -6, 3, -1 }, + { 0, 1, -1, 4, 63, -3, 1, -1 } + /* width <= 4 */ + }, [3 + DAV1D_FILTER_8TAP_REGULAR] = { + { 0, 0, -2, 63, 4, -1, 0, 0 }, + { 0, 0, -4, 61, 9, -2, 0, 0 }, + { 0, 0, -5, 58, 14, -3, 0, 0 }, + { 0, 0, -6, 55, 19, -4, 0, 0 }, + { 0, 0, -6, 51, 24, -5, 0, 0 }, + { 0, 0, -7, 47, 29, -5, 0, 0 }, + { 0, 0, -6, 42, 33, -5, 0, 0 }, + { 0, 0, -6, 38, 38, -6, 0, 0 }, + { 0, 0, -5, 33, 42, -6, 0, 0 }, + { 0, 0, -5, 29, 47, -7, 0, 0 }, + { 0, 0, -5, 24, 51, -6, 0, 0 }, + { 0, 0, -4, 19, 55, -6, 0, 0 }, + { 0, 0, -3, 14, 58, -5, 0, 0 }, + { 0, 0, -2, 9, 61, -4, 0, 0 }, + { 0, 0, -1, 4, 63, -2, 0, 0 } + }, [3 + DAV1D_FILTER_8TAP_SMOOTH] = { + { 0, 0, 15, 31, 17, 1, 0, 0 }, + { 0, 0, 13, 31, 18, 2, 0, 0 }, + { 0, 0, 11, 31, 20, 2, 0, 0 }, + { 0, 0, 10, 30, 21, 3, 0, 0 }, + { 0, 0, 9, 29, 22, 4, 0, 0 }, + { 0, 0, 8, 28, 23, 5, 0, 0 }, + { 0, 0, 7, 27, 24, 6, 0, 0 }, + { 0, 0, 6, 26, 26, 6, 0, 0 }, + { 0, 0, 6, 24, 27, 7, 0, 0 }, + { 0, 0, 5, 23, 28, 8, 0, 0 }, + { 0, 0, 4, 22, 29, 9, 0, 0 }, + { 0, 0, 3, 21, 30, 10, 0, 0 }, + { 0, 0, 2, 20, 31, 11, 0, 0 }, + { 0, 0, 2, 18, 31, 13, 0, 0 }, + { 0, 0, 1, 17, 31, 15, 0, 0 } + /* Bilin scaled being very rarely used, add a new table entry + * and use the put/prep_8tap_scaled code, thus acting as a + * scaled bilinear filter. */ + }, [5] = { + { 0, 0, 0, 60, 4, 0, 0, 0 }, + { 0, 0, 0, 56, 8, 0, 0, 0 }, + { 0, 0, 0, 52, 12, 0, 0, 0 }, + { 0, 0, 0, 48, 16, 0, 0, 0 }, + { 0, 0, 0, 44, 20, 0, 0, 0 }, + { 0, 0, 0, 40, 24, 0, 0, 0 }, + { 0, 0, 0, 36, 28, 0, 0, 0 }, + { 0, 0, 0, 32, 32, 0, 0, 0 }, + { 0, 0, 0, 28, 36, 0, 0, 0 }, + { 0, 0, 0, 24, 40, 0, 0, 0 }, + { 0, 0, 0, 20, 44, 0, 0, 0 }, + { 0, 0, 0, 16, 48, 0, 0, 0 }, + { 0, 0, 0, 12, 52, 0, 0, 0 }, + { 0, 0, 0, 8, 56, 0, 0, 0 }, + { 0, 0, 0, 4, 60, 0, 0, 0 } + } +}; + +const int8_t ALIGN(dav1d_mc_warp_filter[193][8], 8) = { + // [-1, 0) + { 0, 0, 127, 1, 0, 0, 0, 0 }, { 0, -1, 127, 2, 0, 0, 0, 0 }, + { 1, -3, 127, 4, - 1, 0, 0, 0 }, { 1, -4, 126, 6, -2, 1, 0, 0 }, + { 1, -5, 126, 8, - 3, 1, 0, 0 }, { 1, -6, 125, 11, -4, 1, 0, 0 }, + { 1, -7, 124, 13, - 4, 1, 0, 0 }, { 2, -8, 123, 15, -5, 1, 0, 0 }, + { 2, -9, 122, 18, - 6, 1, 0, 0 }, { 2, -10, 121, 20, -6, 1, 0, 0 }, + { 2, -11, 120, 22, - 7, 2, 0, 0 }, { 2, -12, 119, 25, -8, 2, 0, 0 }, + { 3, -13, 117, 27, - 8, 2, 0, 0 }, { 3, -13, 116, 29, -9, 2, 0, 0 }, + { 3, -14, 114, 32, -10, 3, 0, 0 }, { 3, -15, 113, 35, -10, 2, 0, 0 }, + { 3, -15, 111, 37, -11, 3, 0, 0 }, { 3, -16, 109, 40, -11, 3, 0, 0 }, + { 3, -16, 108, 42, -12, 3, 0, 0 }, { 4, -17, 106, 45, -13, 3, 0, 0 }, + { 4, -17, 104, 47, -13, 3, 0, 0 }, { 4, -17, 102, 50, -14, 3, 0, 0 }, + { 4, -17, 100, 52, -14, 3, 0, 0 }, { 4, -18, 98, 55, -15, 4, 0, 0 }, + { 4, -18, 96, 58, -15, 3, 0, 0 }, { 4, -18, 94, 60, -16, 4, 0, 0 }, + { 4, -18, 91, 63, -16, 4, 0, 0 }, { 4, -18, 89, 65, -16, 4, 0, 0 }, + { 4, -18, 87, 68, -17, 4, 0, 0 }, { 4, -18, 85, 70, -17, 4, 0, 0 }, + { 4, -18, 82, 73, -17, 4, 0, 0 }, { 4, -18, 80, 75, -17, 4, 0, 0 }, + { 4, -18, 78, 78, -18, 4, 0, 0 }, { 4, -17, 75, 80, -18, 4, 0, 0 }, + { 4, -17, 73, 82, -18, 4, 0, 0 }, { 4, -17, 70, 85, -18, 4, 0, 0 }, + { 4, -17, 68, 87, -18, 4, 0, 0 }, { 4, -16, 65, 89, -18, 4, 0, 0 }, + { 4, -16, 63, 91, -18, 4, 0, 0 }, { 4, -16, 60, 94, -18, 4, 0, 0 }, + { 3, -15, 58, 96, -18, 4, 0, 0 }, { 4, -15, 55, 98, -18, 4, 0, 0 }, + { 3, -14, 52, 100, -17, 4, 0, 0 }, { 3, -14, 50, 102, -17, 4, 0, 0 }, + { 3, -13, 47, 104, -17, 4, 0, 0 }, { 3, -13, 45, 106, -17, 4, 0, 0 }, + { 3, -12, 42, 108, -16, 3, 0, 0 }, { 3, -11, 40, 109, -16, 3, 0, 0 }, + { 3, -11, 37, 111, -15, 3, 0, 0 }, { 2, -10, 35, 113, -15, 3, 0, 0 }, + { 3, -10, 32, 114, -14, 3, 0, 0 }, { 2, - 9, 29, 116, -13, 3, 0, 0 }, + { 2, -8, 27, 117, -13, 3, 0, 0 }, { 2, - 8, 25, 119, -12, 2, 0, 0 }, + { 2, -7, 22, 120, -11, 2, 0, 0 }, { 1, - 6, 20, 121, -10, 2, 0, 0 }, + { 1, -6, 18, 122, - 9, 2, 0, 0 }, { 1, - 5, 15, 123, - 8, 2, 0, 0 }, + { 1, -4, 13, 124, - 7, 1, 0, 0 }, { 1, - 4, 11, 125, - 6, 1, 0, 0 }, + { 1, -3, 8, 126, - 5, 1, 0, 0 }, { 1, - 2, 6, 126, - 4, 1, 0, 0 }, + { 0, -1, 4, 127, - 3, 1, 0, 0 }, { 0, 0, 2, 127, - 1, 0, 0, 0 }, + // [0, 1) + { 0, 0, 0, 127, 1, 0, 0, 0 }, { 0, 0, -1, 127, 2, 0, 0, 0 }, + { 0, 1, -3, 127, 4, -2, 1, 0 }, { 0, 1, -5, 127, 6, -2, 1, 0 }, + { 0, 2, -6, 126, 8, -3, 1, 0 }, { -1, 2, -7, 126, 11, -4, 2, -1 }, + { -1, 3, -8, 125, 13, -5, 2, -1 }, { -1, 3, -10, 124, 16, -6, 3, -1 }, + { -1, 4, -11, 123, 18, -7, 3, -1 }, { -1, 4, -12, 122, 20, -7, 3, -1 }, + { -1, 4, -13, 121, 23, -8, 3, -1 }, { -2, 5, -14, 120, 25, -9, 4, -1 }, + { -1, 5, -15, 119, 27, -10, 4, -1 }, { -1, 5, -16, 118, 30, -11, 4, -1 }, + { -2, 6, -17, 116, 33, -12, 5, -1 }, { -2, 6, -17, 114, 35, -12, 5, -1 }, + { -2, 6, -18, 113, 38, -13, 5, -1 }, { -2, 7, -19, 111, 41, -14, 6, -2 }, + { -2, 7, -19, 110, 43, -15, 6, -2 }, { -2, 7, -20, 108, 46, -15, 6, -2 }, + { -2, 7, -20, 106, 49, -16, 6, -2 }, { -2, 7, -21, 104, 51, -16, 7, -2 }, + { -2, 7, -21, 102, 54, -17, 7, -2 }, { -2, 8, -21, 100, 56, -18, 7, -2 }, + { -2, 8, -22, 98, 59, -18, 7, -2 }, { -2, 8, -22, 96, 62, -19, 7, -2 }, + { -2, 8, -22, 94, 64, -19, 7, -2 }, { -2, 8, -22, 91, 67, -20, 8, -2 }, + { -2, 8, -22, 89, 69, -20, 8, -2 }, { -2, 8, -22, 87, 72, -21, 8, -2 }, + { -2, 8, -21, 84, 74, -21, 8, -2 }, { -2, 8, -22, 82, 77, -21, 8, -2 }, + { -2, 8, -21, 79, 79, -21, 8, -2 }, { -2, 8, -21, 77, 82, -22, 8, -2 }, + { -2, 8, -21, 74, 84, -21, 8, -2 }, { -2, 8, -21, 72, 87, -22, 8, -2 }, + { -2, 8, -20, 69, 89, -22, 8, -2 }, { -2, 8, -20, 67, 91, -22, 8, -2 }, + { -2, 7, -19, 64, 94, -22, 8, -2 }, { -2, 7, -19, 62, 96, -22, 8, -2 }, + { -2, 7, -18, 59, 98, -22, 8, -2 }, { -2, 7, -18, 56, 100, -21, 8, -2 }, + { -2, 7, -17, 54, 102, -21, 7, -2 }, { -2, 7, -16, 51, 104, -21, 7, -2 }, + { -2, 6, -16, 49, 106, -20, 7, -2 }, { -2, 6, -15, 46, 108, -20, 7, -2 }, + { -2, 6, -15, 43, 110, -19, 7, -2 }, { -2, 6, -14, 41, 111, -19, 7, -2 }, + { -1, 5, -13, 38, 113, -18, 6, -2 }, { -1, 5, -12, 35, 114, -17, 6, -2 }, + { -1, 5, -12, 33, 116, -17, 6, -2 }, { -1, 4, -11, 30, 118, -16, 5, -1 }, + { -1, 4, -10, 27, 119, -15, 5, -1 }, { -1, 4, -9, 25, 120, -14, 5, -2 }, + { -1, 3, -8, 23, 121, -13, 4, -1 }, { -1, 3, -7, 20, 122, -12, 4, -1 }, + { -1, 3, -7, 18, 123, -11, 4, -1 }, { -1, 3, -6, 16, 124, -10, 3, -1 }, + { -1, 2, -5, 13, 125, -8, 3, -1 }, { -1, 2, -4, 11, 126, -7, 2, -1 }, + { 0, 1, -3, 8, 126, -6, 2, 0 }, { 0, 1, -2, 6, 127, -5, 1, 0 }, + { 0, 1, -2, 4, 127, -3, 1, 0 }, { 0, 0, 0, 2, 127, -1, 0, 0 }, + // [1, 2) + { 0, 0, 0, 1, 127, 0, 0, 0 }, { 0, 0, 0, -1, 127, 2, 0, 0 }, + { 0, 0, 1, -3, 127, 4, -1, 0 }, { 0, 0, 1, -4, 126, 6, -2, 1 }, + { 0, 0, 1, -5, 126, 8, -3, 1 }, { 0, 0, 1, -6, 125, 11, -4, 1 }, + { 0, 0, 1, -7, 124, 13, -4, 1 }, { 0, 0, 2, -8, 123, 15, -5, 1 }, + { 0, 0, 2, -9, 122, 18, -6, 1 }, { 0, 0, 2, -10, 121, 20, -6, 1 }, + { 0, 0, 2, -11, 120, 22, -7, 2 }, { 0, 0, 2, -12, 119, 25, -8, 2 }, + { 0, 0, 3, -13, 117, 27, -8, 2 }, { 0, 0, 3, -13, 116, 29, -9, 2 }, + { 0, 0, 3, -14, 114, 32, -10, 3 }, { 0, 0, 3, -15, 113, 35, -10, 2 }, + { 0, 0, 3, -15, 111, 37, -11, 3 }, { 0, 0, 3, -16, 109, 40, -11, 3 }, + { 0, 0, 3, -16, 108, 42, -12, 3 }, { 0, 0, 4, -17, 106, 45, -13, 3 }, + { 0, 0, 4, -17, 104, 47, -13, 3 }, { 0, 0, 4, -17, 102, 50, -14, 3 }, + { 0, 0, 4, -17, 100, 52, -14, 3 }, { 0, 0, 4, -18, 98, 55, -15, 4 }, + { 0, 0, 4, -18, 96, 58, -15, 3 }, { 0, 0, 4, -18, 94, 60, -16, 4 }, + { 0, 0, 4, -18, 91, 63, -16, 4 }, { 0, 0, 4, -18, 89, 65, -16, 4 }, + { 0, 0, 4, -18, 87, 68, -17, 4 }, { 0, 0, 4, -18, 85, 70, -17, 4 }, + { 0, 0, 4, -18, 82, 73, -17, 4 }, { 0, 0, 4, -18, 80, 75, -17, 4 }, + { 0, 0, 4, -18, 78, 78, -18, 4 }, { 0, 0, 4, -17, 75, 80, -18, 4 }, + { 0, 0, 4, -17, 73, 82, -18, 4 }, { 0, 0, 4, -17, 70, 85, -18, 4 }, + { 0, 0, 4, -17, 68, 87, -18, 4 }, { 0, 0, 4, -16, 65, 89, -18, 4 }, + { 0, 0, 4, -16, 63, 91, -18, 4 }, { 0, 0, 4, -16, 60, 94, -18, 4 }, + { 0, 0, 3, -15, 58, 96, -18, 4 }, { 0, 0, 4, -15, 55, 98, -18, 4 }, + { 0, 0, 3, -14, 52, 100, -17, 4 }, { 0, 0, 3, -14, 50, 102, -17, 4 }, + { 0, 0, 3, -13, 47, 104, -17, 4 }, { 0, 0, 3, -13, 45, 106, -17, 4 }, + { 0, 0, 3, -12, 42, 108, -16, 3 }, { 0, 0, 3, -11, 40, 109, -16, 3 }, + { 0, 0, 3, -11, 37, 111, -15, 3 }, { 0, 0, 2, -10, 35, 113, -15, 3 }, + { 0, 0, 3, -10, 32, 114, -14, 3 }, { 0, 0, 2, -9, 29, 116, -13, 3 }, + { 0, 0, 2, -8, 27, 117, -13, 3 }, { 0, 0, 2, -8, 25, 119, -12, 2 }, + { 0, 0, 2, -7, 22, 120, -11, 2 }, { 0, 0, 1, -6, 20, 121, -10, 2 }, + { 0, 0, 1, -6, 18, 122, -9, 2 }, { 0, 0, 1, -5, 15, 123, -8, 2 }, + { 0, 0, 1, -4, 13, 124, -7, 1 }, { 0, 0, 1, -4, 11, 125, -6, 1 }, + { 0, 0, 1, -3, 8, 126, -5, 1 }, { 0, 0, 1, -2, 6, 126, -4, 1 }, + { 0, 0, 0, -1, 4, 127, -3, 1 }, { 0, 0, 0, 0, 2, 127, -1, 0 }, + // dummy (replicate row index 191) + { 0, 0, 0, 0, 2, 127, -1, 0 }, +}; + +const int8_t ALIGN(dav1d_resize_filter[64][8], 8) = { + { 0, 0, 0, -128, 0, 0, 0, 0 }, { 0, 0, 1, -128, -2, 1, 0, 0 }, + { 0, -1, 3, -127, -4, 2, -1, 0 }, { 0, -1, 4, -127, -6, 3, -1, 0 }, + { 0, -2, 6, -126, -8, 3, -1, 0 }, { 0, -2, 7, -125, -11, 4, -1, 0 }, + { 1, -2, 8, -125, -13, 5, -2, 0 }, { 1, -3, 9, -124, -15, 6, -2, 0 }, + { 1, -3, 10, -123, -18, 6, -2, 1 }, { 1, -3, 11, -122, -20, 7, -3, 1 }, + { 1, -4, 12, -121, -22, 8, -3, 1 }, { 1, -4, 13, -120, -25, 9, -3, 1 }, + { 1, -4, 14, -118, -28, 9, -3, 1 }, { 1, -4, 15, -117, -30, 10, -4, 1 }, + { 1, -5, 16, -116, -32, 11, -4, 1 }, { 1, -5, 16, -114, -35, 12, -4, 1 }, + { 1, -5, 17, -112, -38, 12, -4, 1 }, { 1, -5, 18, -111, -40, 13, -5, 1 }, + { 1, -5, 18, -109, -43, 14, -5, 1 }, { 1, -6, 19, -107, -45, 14, -5, 1 }, + { 1, -6, 19, -105, -48, 15, -5, 1 }, { 1, -6, 19, -103, -51, 16, -5, 1 }, + { 1, -6, 20, -101, -53, 16, -6, 1 }, { 1, -6, 20, -99, -56, 17, -6, 1 }, + { 1, -6, 20, -97, -58, 17, -6, 1 }, { 1, -6, 20, -95, -61, 18, -6, 1 }, + { 2, -7, 20, -93, -64, 18, -6, 2 }, { 2, -7, 20, -91, -66, 19, -6, 1 }, + { 2, -7, 20, -88, -69, 19, -6, 1 }, { 2, -7, 20, -86, -71, 19, -6, 1 }, + { 2, -7, 20, -84, -74, 20, -7, 2 }, { 2, -7, 20, -81, -76, 20, -7, 1 }, + { 2, -7, 20, -79, -79, 20, -7, 2 }, { 1, -7, 20, -76, -81, 20, -7, 2 }, + { 2, -7, 20, -74, -84, 20, -7, 2 }, { 1, -6, 19, -71, -86, 20, -7, 2 }, + { 1, -6, 19, -69, -88, 20, -7, 2 }, { 1, -6, 19, -66, -91, 20, -7, 2 }, + { 2, -6, 18, -64, -93, 20, -7, 2 }, { 1, -6, 18, -61, -95, 20, -6, 1 }, + { 1, -6, 17, -58, -97, 20, -6, 1 }, { 1, -6, 17, -56, -99, 20, -6, 1 }, + { 1, -6, 16, -53, -101, 20, -6, 1 }, { 1, -5, 16, -51, -103, 19, -6, 1 }, + { 1, -5, 15, -48, -105, 19, -6, 1 }, { 1, -5, 14, -45, -107, 19, -6, 1 }, + { 1, -5, 14, -43, -109, 18, -5, 1 }, { 1, -5, 13, -40, -111, 18, -5, 1 }, + { 1, -4, 12, -38, -112, 17, -5, 1 }, { 1, -4, 12, -35, -114, 16, -5, 1 }, + { 1, -4, 11, -32, -116, 16, -5, 1 }, { 1, -4, 10, -30, -117, 15, -4, 1 }, + { 1, -3, 9, -28, -118, 14, -4, 1 }, { 1, -3, 9, -25, -120, 13, -4, 1 }, + { 1, -3, 8, -22, -121, 12, -4, 1 }, { 1, -3, 7, -20, -122, 11, -3, 1 }, + { 1, -2, 6, -18, -123, 10, -3, 1 }, { 0, -2, 6, -15, -124, 9, -3, 1 }, + { 0, -2, 5, -13, -125, 8, -2, 1 }, { 0, -1, 4, -11, -125, 7, -2, 0 }, + { 0, -1, 3, -8, -126, 6, -2, 0 }, { 0, -1, 3, -6, -127, 4, -1, 0 }, + { 0, -1, 2, -4, -127, 3, -1, 0 }, { 0, 0, 1, -2, -128, 1, 0, 0 }, +}; + +const uint8_t ALIGN(dav1d_sm_weights[128], 16) = { + // Unused, because we always offset by bs, which is at least 2. + 0, 0, + // bs = 2 + 255, 128, + // bs = 4 + 255, 149, 85, 64, + // bs = 8 + 255, 197, 146, 105, 73, 50, 37, 32, + // bs = 16 + 255, 225, 196, 170, 145, 123, 102, 84, + 68, 54, 43, 33, 26, 20, 17, 16, + // bs = 32 + 255, 240, 225, 210, 196, 182, 169, 157, + 145, 133, 122, 111, 101, 92, 83, 74, + 66, 59, 52, 45, 39, 34, 29, 25, + 21, 17, 14, 12, 10, 9, 8, 8, + // bs = 64 + 255, 248, 240, 233, 225, 218, 210, 203, + 196, 189, 182, 176, 169, 163, 156, 150, + 144, 138, 133, 127, 121, 116, 111, 106, + 101, 96, 91, 86, 82, 77, 73, 69, + 65, 61, 57, 54, 50, 47, 44, 41, + 38, 35, 32, 29, 27, 25, 22, 20, + 18, 16, 15, 13, 12, 10, 9, 8, + 7, 6, 6, 5, 5, 4, 4, 4 +}; + +const uint16_t dav1d_dr_intra_derivative[44] = { + // Values that are 0 will never be used + 0, // Angles: + 1023, 0, // 3, 93, 183 + 547, // 6, 96, 186 + 372, 0, 0, // 9, 99, 189 + 273, // 14, 104, 194 + 215, 0, // 17, 107, 197 + 178, // 20, 110, 200 + 151, 0, // 23, 113, 203 (113 & 203 are base angles) + 132, // 26, 116, 206 + 116, 0, // 29, 119, 209 + 102, 0, // 32, 122, 212 + 90, // 36, 126, 216 + 80, 0, // 39, 129, 219 + 71, // 42, 132, 222 + 64, 0, // 45, 135, 225 (45 & 135 are base angles) + 57, // 48, 138, 228 + 51, 0, // 51, 141, 231 + 45, 0, // 54, 144, 234 + 40, // 58, 148, 238 + 35, 0, // 61, 151, 241 + 31, // 64, 154, 244 + 27, 0, // 67, 157, 247 (67 & 157 are base angles) + 23, // 70, 160, 250 + 19, 0, // 73, 163, 253 + 15, 0, // 76, 166, 256 + 11, 0, // 81, 171, 261 + 7, // 84, 174, 264 + 3 // 87, 177, 267 +}; + +#if ARCH_X86 +#define F(idx, f0, f1, f2, f3, f4, f5, f6) \ + [2*idx+0] = f0, [2*idx+1] = f1, \ + [2*idx+16] = f2, [2*idx+17] = f3, \ + [2*idx+32] = f4, [2*idx+33] = f5, \ + [2*idx+48] = f6 +#else +#define F(idx, f0, f1, f2, f3, f4, f5, f6) \ + [1*idx+0] = f0, [1*idx+8] = f1, \ + [1*idx+16] = f2, [1*idx+24] = f3, \ + [1*idx+32] = f4, [1*idx+40] = f5, \ + [1*idx+48] = f6 +#endif +const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 64) = { + { + F( 0, -6, 10, 0, 0, 0, 12, 0 ), + F( 1, -5, 2, 10, 0, 0, 9, 0 ), + F( 2, -3, 1, 1, 10, 0, 7, 0 ), + F( 3, -3, 1, 1, 2, 10, 5, 0 ), + F( 4, -4, 6, 0, 0, 0, 2, 12 ), + F( 5, -3, 2, 6, 0, 0, 2, 9 ), + F( 6, -3, 2, 2, 6, 0, 2, 7 ), + F( 7, -3, 1, 2, 2, 6, 3, 5 ), + }, { + F( 0, -10, 16, 0, 0, 0, 10, 0 ), + F( 1, -6, 0, 16, 0, 0, 6, 0 ), + F( 2, -4, 0, 0, 16, 0, 4, 0 ), + F( 3, -2, 0, 0, 0, 16, 2, 0 ), + F( 4, -10, 16, 0, 0, 0, 0, 10 ), + F( 5, -6, 0, 16, 0, 0, 0, 6 ), + F( 6, -4, 0, 0, 16, 0, 0, 4 ), + F( 7, -2, 0, 0, 0, 16, 0, 2 ), + }, { + F( 0, -8, 8, 0, 0, 0, 16, 0 ), + F( 1, -8, 0, 8, 0, 0, 16, 0 ), + F( 2, -8, 0, 0, 8, 0, 16, 0 ), + F( 3, -8, 0, 0, 0, 8, 16, 0 ), + F( 4, -4, 4, 0, 0, 0, 0, 16 ), + F( 5, -4, 0, 4, 0, 0, 0, 16 ), + F( 6, -4, 0, 0, 4, 0, 0, 16 ), + F( 7, -4, 0, 0, 0, 4, 0, 16 ), + }, { + F( 0, -2, 8, 0, 0, 0, 10, 0 ), + F( 1, -1, 3, 8, 0, 0, 6, 0 ), + F( 2, -1, 2, 3, 8, 0, 4, 0 ), + F( 3, 0, 1, 2, 3, 8, 2, 0 ), + F( 4, -1, 4, 0, 0, 0, 3, 10 ), + F( 5, -1, 3, 4, 0, 0, 4, 6 ), + F( 6, -1, 2, 3, 4, 0, 4, 4 ), + F( 7, -1, 2, 2, 3, 4, 3, 3 ), + }, { + F( 0, -12, 14, 0, 0, 0, 14, 0 ), + F( 1, -10, 0, 14, 0, 0, 12, 0 ), + F( 2, -9, 0, 0, 14, 0, 11, 0 ), + F( 3, -8, 0, 0, 0, 14, 10, 0 ), + F( 4, -10, 12, 0, 0, 0, 0, 14 ), + F( 5, -9, 1, 12, 0, 0, 0, 12 ), + F( 6, -8, 0, 0, 12, 0, 1, 11 ), + F( 7, -7, 0, 0, 1, 12, 1, 9 ), + } +}; + +const uint8_t ALIGN(dav1d_obmc_masks[64], 16) = { + /* Unused */ + 0, 0, + /* 2 */ + 19, 0, + /* 4 */ + 25, 14, 5, 0, + /* 8 */ + 28, 22, 16, 11, 7, 3, 0, 0, + /* 16 */ + 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 0, 0, 0, 0, + /* 32 */ + 31, 29, 28, 26, 24, 23, 21, 20, 19, 17, 16, 14, 13, 12, 11, 9, + 8, 7, 6, 5, 4, 4, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// Taken from the spec. Range is [-2048, 2047], mean is 0 and stddev is 512 +const int16_t dav1d_gaussian_sequence[2048] = { + 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820, + 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800, + 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588, + -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368, + 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4, + 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396, + 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740, + 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292, + 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532, + 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704, + 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96, + -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244, + 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136, + 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676, + -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400, + -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844, + -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96, + -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356, + 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280, + 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808, + 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228, + -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136, + -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264, + -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388, + 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500, + 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384, + 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220, + -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148, + 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572, + -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516, + 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916, + -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492, + 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560, + -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108, + -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516, + -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88, + -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196, + -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864, + 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920, + 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564, + -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876, + -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244, + 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184, + 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364, + -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72, + 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24, + 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4, + -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120, + 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108, + -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296, + 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336, + -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164, + -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264, + 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536, + -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296, + -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696, + 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204, + 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212, + -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40, + 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384, + 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8, + 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704, + -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348, + -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592, + -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420, + 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220, + -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208, + -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544, + -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288, + -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240, + -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132, + 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16, + -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044, + -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732, + 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460, + -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52, + -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104, + -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460, + 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716, + -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960, + 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476, + 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692, + 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352, + -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144, + -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44, + 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356, + 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452, + -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552, + -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264, + -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448, + -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588, + 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464, + 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216, + 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132, + 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412, + 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48, + 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196, + 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48, + -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292, + 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32, + -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012, + -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120, + -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56, + 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416, + -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404, + -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92, + 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904, + 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728, + 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584, + 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48, + 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180, + 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528, + 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364, + -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260, + -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324, + -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64, + 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120, + -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168, + -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888, + 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588, + -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484, + 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580, + 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392, + 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80, + -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688, + 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4, + -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300, + 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444, + 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192, + 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160, + 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188, + -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404, + -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400, + 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92, + -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824, + 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620, + 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720, + 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620, + -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508, + -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736, + 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836, + 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180, + 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140, + -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32, + -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916, + 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368, + -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380, + -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572, + -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864, + 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908, + -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84, + 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396, + -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360, + 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928, + -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288, + 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196, + 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504, + 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272, + 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344, + -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208, + -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156, + -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240, + -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432, + 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244, + 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584, + 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24, + 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300, + -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416, + 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380, + -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384, + 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88, + 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876, + -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320, + -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88, + -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196, + -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120, + 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664, + -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0, + -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264, + -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288, + -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56, + 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148, + 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156, + -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144, + -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148, + 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944, + 428, -484 +}; diff --git a/media/video/av1/src/tables.h b/media/video/av1/src/tables.h new file mode 100644 index 00000000..f3c00cfb --- /dev/null +++ b/media/video/av1/src/tables.h @@ -0,0 +1,125 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_TABLES_H +#define DAV1D_SRC_TABLES_H + +#include + +#include "common/intops.h" + +#include "src/levels.h" + +EXTERN const uint8_t dav1d_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS]; +EXTERN const uint8_t /* enum BlockSize */ + dav1d_block_sizes[N_BL_LEVELS][N_PARTITIONS][2]; +// width, height (in 4px blocks), log2 versions of these two +EXTERN const uint8_t dav1d_block_dimensions[N_BS_SIZES][4]; +typedef struct TxfmInfo { + // width, height (in 4px blocks), log2 of them, min/max of log2, sub, pad + uint8_t w, h, lw, lh, min, max, sub, ctx; +} TxfmInfo; +EXTERN const TxfmInfo dav1d_txfm_dimensions[N_RECT_TX_SIZES]; +EXTERN const uint8_t /* enum (Rect)TxfmSize */ + dav1d_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */]; +EXTERN const uint8_t /* enum TxfmType */ + dav1d_txtp_from_uvmode[N_UV_INTRA_PRED_MODES]; + +EXTERN const uint8_t /* enum InterPredMode */ + dav1d_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2]; + +EXTERN const uint8_t dav1d_partition_type_count[N_BL_LEVELS]; +EXTERN const uint8_t /* enum TxfmType */ dav1d_tx_types_per_set[40]; + +EXTERN const uint8_t dav1d_filter_mode_to_y_mode[5]; +EXTERN const uint8_t dav1d_ymode_size_context[N_BS_SIZES]; +EXTERN const uint8_t dav1d_lo_ctx_offsets[3][5][5]; +EXTERN const uint8_t dav1d_skip_ctx[5][5]; +EXTERN const uint8_t /* enum TxClass */ + dav1d_tx_type_class[N_TX_TYPES_PLUS_LL]; +EXTERN const uint8_t /* enum Filter2d */ + dav1d_filter_2d[DAV1D_N_FILTERS /* h */][DAV1D_N_FILTERS /* v */]; +EXTERN const uint8_t /* enum Dav1dFilterMode */ dav1d_filter_dir[N_2D_FILTERS][2]; +EXTERN const uint8_t dav1d_intra_mode_context[N_INTRA_PRED_MODES]; +EXTERN const uint8_t dav1d_wedge_ctx_lut[N_BS_SIZES]; + +static const unsigned cfl_allowed_mask = + (1 << BS_32x32) | + (1 << BS_32x16) | + (1 << BS_32x8) | + (1 << BS_16x32) | + (1 << BS_16x16) | + (1 << BS_16x8) | + (1 << BS_16x4) | + (1 << BS_8x32) | + (1 << BS_8x16) | + (1 << BS_8x8) | + (1 << BS_8x4) | + (1 << BS_4x16) | + (1 << BS_4x8) | + (1 << BS_4x4); + +static const unsigned wedge_allowed_mask = + (1 << BS_32x32) | + (1 << BS_32x16) | + (1 << BS_32x8) | + (1 << BS_16x32) | + (1 << BS_16x16) | + (1 << BS_16x8) | + (1 << BS_8x32) | + (1 << BS_8x16) | + (1 << BS_8x8); + +static const unsigned interintra_allowed_mask = + (1 << BS_32x32) | + (1 << BS_32x16) | + (1 << BS_16x32) | + (1 << BS_16x16) | + (1 << BS_16x8) | + (1 << BS_8x16) | + (1 << BS_8x8); + +EXTERN const Dav1dWarpedMotionParams dav1d_default_wm_params; + +EXTERN const int8_t dav1d_cdef_directions[12][2]; + +EXTERN const uint16_t dav1d_sgr_params[16][2]; +EXTERN const uint8_t dav1d_sgr_x_by_x[256]; + +EXTERN const int8_t dav1d_mc_subpel_filters[6][15][8]; +EXTERN const int8_t dav1d_mc_warp_filter[193][8]; +EXTERN const int8_t dav1d_resize_filter[64][8]; + +EXTERN const uint8_t dav1d_sm_weights[128]; +EXTERN const uint16_t dav1d_dr_intra_derivative[44]; +EXTERN const int8_t dav1d_filter_intra_taps[5][64]; + +EXTERN const uint8_t dav1d_obmc_masks[64]; + +EXTERN const int16_t dav1d_gaussian_sequence[2048]; // for fgs + +#endif /* DAV1D_SRC_TABLES_H */ diff --git a/media/video/av1/src/thread.h b/media/video/av1/src/thread.h new file mode 100644 index 00000000..b091e4f2 --- /dev/null +++ b/media/video/av1/src/thread.h @@ -0,0 +1,188 @@ +/* + * Copyright © 2018-2021, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_THREAD_H +#define DAV1D_SRC_THREAD_H + +#if defined(_WIN32) + +#include +#include + +#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT + +typedef struct { + HANDLE h; + void *(*func)(void*); + void *arg; +} pthread_t; + +typedef struct { + unsigned stack_size; +} pthread_attr_t; + +typedef SRWLOCK pthread_mutex_t; +typedef CONDITION_VARIABLE pthread_cond_t; +typedef INIT_ONCE pthread_once_t; + +void dav1d_init_thread(void); +void dav1d_set_thread_name(const wchar_t *name); +#define dav1d_set_thread_name(name) dav1d_set_thread_name(L##name) + +int dav1d_pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*func)(void*), void *arg); +int dav1d_pthread_join(pthread_t *thread, void **res); +int dav1d_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)); + +#define pthread_create dav1d_pthread_create +#define pthread_join(thread, res) dav1d_pthread_join(&(thread), res) +#define pthread_once dav1d_pthread_once + +static inline int pthread_attr_init(pthread_attr_t *const attr) { + attr->stack_size = 0; + return 0; +} + +static inline int pthread_attr_destroy(pthread_attr_t *const attr) { + return 0; +} + +static inline int pthread_attr_setstacksize(pthread_attr_t *const attr, + const size_t stack_size) +{ + if (stack_size > UINT_MAX) return 1; + attr->stack_size = (unsigned) stack_size; + return 0; +} + +static inline int pthread_mutex_init(pthread_mutex_t *const mutex, + const void *const attr) +{ + InitializeSRWLock(mutex); + return 0; +} + +static inline int pthread_mutex_destroy(pthread_mutex_t *const mutex) { + return 0; +} + +static inline int pthread_mutex_lock(pthread_mutex_t *const mutex) { + AcquireSRWLockExclusive(mutex); + return 0; +} + +static inline int pthread_mutex_unlock(pthread_mutex_t *const mutex) { + ReleaseSRWLockExclusive(mutex); + return 0; +} + +static inline int pthread_cond_init(pthread_cond_t *const cond, + const void *const attr) +{ + InitializeConditionVariable(cond); + return 0; +} + +static inline int pthread_cond_destroy(pthread_cond_t *const cond) { + return 0; +} + +static inline int pthread_cond_wait(pthread_cond_t *const cond, + pthread_mutex_t *const mutex) +{ + return !SleepConditionVariableSRW(cond, mutex, INFINITE, 0); +} + +static inline int pthread_cond_signal(pthread_cond_t *const cond) { + WakeConditionVariable(cond); + return 0; +} + +static inline int pthread_cond_broadcast(pthread_cond_t *const cond) { + WakeAllConditionVariable(cond); + return 0; +} + +#else + +#include + +#define dav1d_init_thread() do {} while (0) + +/* Thread naming support */ + +#ifdef __linux__ + +#include + +static inline void dav1d_set_thread_name(const char *const name) { + prctl(PR_SET_NAME, name); +} + +#elif defined(__APPLE__) + +static inline void dav1d_set_thread_name(const char *const name) { + pthread_setname_np(name); +} + +#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) + +#if defined(__FreeBSD__) + /* ALIGN from conflicts with ALIGN from "common/attributes.h" */ +#define _SYS_PARAM_H_ +#include +#endif +#include + +static inline void dav1d_set_thread_name(const char *const name) { + pthread_set_name_np(pthread_self(), name); +} + +#elif defined(__NetBSD__) + +static inline void dav1d_set_thread_name(const char *const name) { + pthread_setname_np(pthread_self(), "%s", (void*)name); +} + +#elif defined(__HAIKU__) + +#include + +static inline void dav1d_set_thread_name(const char *const name) { + rename_thread(find_thread(NULL), name); +} + +#else + +#define dav1d_set_thread_name(name) do {} while (0) + +#endif + +#endif + +#endif /* DAV1D_SRC_THREAD_H */ diff --git a/media/video/av1/src/thread_data.h b/media/video/av1/src/thread_data.h new file mode 100644 index 00000000..62814e63 --- /dev/null +++ b/media/video/av1/src/thread_data.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2018, VideoLAN and dav1d authors + * Copyright © 2018, Two Orioles, LLC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DAV1D_SRC_THREAD_DATA_H +#define DAV1D_SRC_THREAD_DATA_H + +#include "src/thread.h" + +struct thread_data { + pthread_t thread; + pthread_cond_t cond; + pthread_mutex_t lock; + int inited; +}; + +#endif /* DAV1D_SRC_THREAD_DATA_H */