2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include "common/intops.h"
36 #include "src/tables.h"
38 static int cdef_find_dir_c(const pixel
*img
, const ptrdiff_t stride
,
39 unsigned *const var HIGHBD_DECL_SUFFIX
)
41 const int bitdepth_min_8
= bitdepth_from_max(bitdepth_max
) - 8;
42 printf("bitdepth_max = %d, bitdepth_min_8 = %d\n", bitdepth_max
, bitdepth_min_8
);
43 int partial_sum_hv
[2][8] = { { 0 } };
44 int partial_sum_diag
[2][15] = { { 0 } };
45 int partial_sum_alt
[4][11] = { { 0 } };
47 for (int y
= 0; y
< 8; y
++) {
48 printf("img: y: %d : %p -> \t", y
, img
);
49 for (int x
= 0; x
< 8; x
++) {
50 const int px
= (img
[x
] >> bitdepth_min_8
) - 128;
51 printf("%04x/%04x ", img
[x
], px
);
53 partial_sum_diag
[0][ y
+ x
] += px
;
54 partial_sum_alt
[0][ y
+ (x
>> 1)] += px
;
55 partial_sum_hv
[0][ y
] += px
;
56 partial_sum_alt
[1][3 + y
- (x
>> 1)] += px
;
57 partial_sum_diag
[1][7 + y
- x
] += px
;
58 partial_sum_alt
[2][3 - (y
>> 1) + x
] += px
;
59 partial_sum_hv
[1][ x
] += px
;
60 partial_sum_alt
[3][ (y
>> 1) + x
] += px
;
63 img
+= PXSTRIDE(stride
);
66 printf("partial_sum_alt : \n");
67 for (int y
= 0; y
< 4; y
++) {
68 for (int x
= 0; x
< 11; x
++) {
69 printf("%08x ", partial_sum_alt
[y
][x
]);
75 unsigned cost
[8] = { 0 };
76 for (int n
= 0; n
< 8; n
++) {
77 cost
[2] += partial_sum_hv
[0][n
] * partial_sum_hv
[0][n
];
78 cost
[6] += partial_sum_hv
[1][n
] * partial_sum_hv
[1][n
];
83 static const uint16_t div_table
[7] = { 840, 420, 280, 210, 168, 140, 120 };
84 for (int n
= 0; n
< 7; n
++) {
85 const int d
= div_table
[n
];
87 int t
= partial_sum_diag
[0][14 - n
];
88 printf("partial_sum_diag[0][14 - %d] = %d/%08x, partial_sum_diag[0][14 - %d]^2 = %d/%08x\n", n
, t
, t
, n
, t
* t
, t
* t
);
89 t
= (partial_sum_diag
[0][n
] * partial_sum_diag
[0][n
] +
90 partial_sum_diag
[0][14 - n
] * partial_sum_diag
[0][14 - n
]) * d
;
91 printf("t = %d/%08x, d = %d/%08x, t * d = %d/%08x\n", t
, t
, d
, d
, t
* d
, t
* d
);
93 cost
[4] += (partial_sum_diag
[1][n
] * partial_sum_diag
[1][n
] +
94 partial_sum_diag
[1][14 - n
] * partial_sum_diag
[1][14 - n
]) * d
;
96 cost
[0] += partial_sum_diag
[0][7] * partial_sum_diag
[0][7] * 105;
97 cost
[4] += partial_sum_diag
[1][7] * partial_sum_diag
[1][7] * 105;
100 for (int y
= 0; y
< 8; y
++) {
101 printf("%08x ", cost
[y
]);
105 for (int n
= 0; n
< 4; n
++) {
106 unsigned *const cost_ptr
= &cost
[n
* 2 + 1];
107 for (int m
= 0; m
< 5; m
++)
108 *cost_ptr
+= partial_sum_alt
[n
][3 + m
] * partial_sum_alt
[n
][3 + m
];
110 for (int m
= 0; m
< 3; m
++) {
111 const int d
= div_table
[2 * m
+ 1];
112 *cost_ptr
+= (partial_sum_alt
[n
][m
] * partial_sum_alt
[n
][m
] +
113 partial_sum_alt
[n
][10 - m
] * partial_sum_alt
[n
][10 - m
]) * d
;
117 for (int y
= 0; y
< 8; y
++) {
118 printf("%08x ", cost
[y
]);
123 unsigned best_cost
= cost
[0];
124 for (int n
= 1; n
< 8; n
++) {
125 if (cost
[n
] > best_cost
) {
131 *var
= (best_cost
- (cost
[best_dir
^ 4])) >> 10;
136 #if ARCH_AARCH64 || ARCH_ARM
137 #include "src/arm/cdef.h"
139 #include "src/ppc/cdef.h"
141 #include "src/x86/cdef.h"
145 COLD
void bitfn(dav1d_cdef_dsp_init
)(Dav1dCdefDSPContext
*const c
) {
146 c
->dir
= cdef_find_dir_c
;
149 #if ARCH_AARCH64 || ARCH_ARM
150 cdef_dsp_init_arm(c
);
152 cdef_dsp_init_ppc(c
);
154 cdef_dsp_init_x86(c
);