pysvp64db: fix traversal
[openpower-isa.git] / media / video / av1 / src / cdef_tmpl.c
1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29
30 #include <stdlib.h>
31 #include <stdio.h>
32
33 #include "common/intops.h"
34
35 #include "src/cdef.h"
36 #include "src/tables.h"
37
38 static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
39 unsigned *const var HIGHBD_DECL_SUFFIX)
40 {
41 const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
42 printf("bitdepth_max = %d, bitdepth_min_8 = %d\n", bitdepth_max, bitdepth_min_8);
43 int partial_sum_hv[2][8] = { { 0 } };
44 int partial_sum_diag[2][15] = { { 0 } };
45 int partial_sum_alt[4][11] = { { 0 } };
46
47 for (int y = 0; y < 8; y++) {
48 printf("img: y: %d : %p -> \t", y, img);
49 for (int x = 0; x < 8; x++) {
50 const int px = (img[x] >> bitdepth_min_8) - 128;
51 printf("%04x/%04x ", img[x], px);
52
53 partial_sum_diag[0][ y + x ] += px;
54 partial_sum_alt [0][ y + (x >> 1)] += px;
55 partial_sum_hv [0][ y ] += px;
56 partial_sum_alt [1][3 + y - (x >> 1)] += px;
57 partial_sum_diag[1][7 + y - x ] += px;
58 partial_sum_alt [2][3 - (y >> 1) + x ] += px;
59 partial_sum_hv [1][ x ] += px;
60 partial_sum_alt [3][ (y >> 1) + x ] += px;
61 }
62 printf("\n");
63 img += PXSTRIDE(stride);
64 }
65
66 printf("partial_sum_alt : \n");
67 for (int y = 0; y < 4; y++) {
68 for (int x = 0; x < 11; x++) {
69 printf("%08x ", partial_sum_alt[y][x]);
70 }
71 printf("\n");
72 }
73 printf("\n");
74
75 unsigned cost[8] = { 0 };
76 for (int n = 0; n < 8; n++) {
77 cost[2] += partial_sum_hv[0][n] * partial_sum_hv[0][n];
78 cost[6] += partial_sum_hv[1][n] * partial_sum_hv[1][n];
79 }
80 cost[2] *= 105;
81 cost[6] *= 105;
82
83 static const uint16_t div_table[7] = { 840, 420, 280, 210, 168, 140, 120 };
84 for (int n = 0; n < 7; n++) {
85 const int d = div_table[n];
86 printf("n: %d\n", n);
87 int t = partial_sum_diag[0][14 - n];
88 printf("partial_sum_diag[0][14 - %d] = %d/%08x, partial_sum_diag[0][14 - %d]^2 = %d/%08x\n", n, t, t , n, t * t, t * t);
89 t = (partial_sum_diag[0][n] * partial_sum_diag[0][n] +
90 partial_sum_diag[0][14 - n] * partial_sum_diag[0][14 - n]) * d;
91 printf("t = %d/%08x, d = %d/%08x, t * d = %d/%08x\n", t, t, d, d, t * d, t * d);
92 cost[0] += t * d;
93 cost[4] += (partial_sum_diag[1][n] * partial_sum_diag[1][n] +
94 partial_sum_diag[1][14 - n] * partial_sum_diag[1][14 - n]) * d;
95 }
96 cost[0] += partial_sum_diag[0][7] * partial_sum_diag[0][7] * 105;
97 cost[4] += partial_sum_diag[1][7] * partial_sum_diag[1][7] * 105;
98
99 printf("cost: \n");
100 for (int y = 0; y < 8; y++) {
101 printf("%08x ", cost[y]);
102 }
103 printf("\n");
104
105 for (int n = 0; n < 4; n++) {
106 unsigned *const cost_ptr = &cost[n * 2 + 1];
107 for (int m = 0; m < 5; m++)
108 *cost_ptr += partial_sum_alt[n][3 + m] * partial_sum_alt[n][3 + m];
109 *cost_ptr *= 105;
110 for (int m = 0; m < 3; m++) {
111 const int d = div_table[2 * m + 1];
112 *cost_ptr += (partial_sum_alt[n][m] * partial_sum_alt[n][m] +
113 partial_sum_alt[n][10 - m] * partial_sum_alt[n][10 - m]) * d;
114 }
115 }
116 printf("cost: \n");
117 for (int y = 0; y < 8; y++) {
118 printf("%08x ", cost[y]);
119 }
120 printf("\n");
121
122 int best_dir = 0;
123 unsigned best_cost = cost[0];
124 for (int n = 1; n < 8; n++) {
125 if (cost[n] > best_cost) {
126 best_cost = cost[n];
127 best_dir = n;
128 }
129 }
130
131 *var = (best_cost - (cost[best_dir ^ 4])) >> 10;
132 return best_dir;
133 }
134
135 #if HAVE_ASM
136 #if ARCH_AARCH64 || ARCH_ARM
137 #include "src/arm/cdef.h"
138 #elif ARCH_PPC64LE
139 #include "src/ppc/cdef.h"
140 #elif ARCH_X86
141 #include "src/x86/cdef.h"
142 #endif
143 #endif
144
145 COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
146 c->dir = cdef_find_dir_c;
147
148 #if HAVE_ASM
149 #if ARCH_AARCH64 || ARCH_ARM
150 cdef_dsp_init_arm(c);
151 #elif ARCH_PPC64LE
152 cdef_dsp_init_ppc(c);
153 #elif ARCH_X86
154 cdef_dsp_init_x86(c);
155 #endif
156 #endif
157 }