targets/vdpau-nouveau: convert to static/shared pipe-drivers
[mesa.git] / src / gallium / auxiliary / vl / vl_deint_filter.c
1 /**************************************************************************
2 *
3 * Copyright 2013 Grigori Goronzy <greg@chown.ath.cx>.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * References:
30 *
31 * Lin, S. F., Chang, Y. L., & Chen, L. G. (2003).
32 * Motion adaptive interpolation with horizontal motion detection for deinterlacing.
33 * Consumer Electronics, IEEE Transactions on, 49(4), 1256-1265.
34 *
35 * Pei-Yin, C. H. E. N., & Yao-Hsien, L. A. I. (2007).
36 * A low-complexity interpolation method for deinterlacing.
37 * IEICE transactions on information and systems, 90(2), 606-608.
38 *
39 */
40
41 #include <stdio.h>
42
43 #include "pipe/p_context.h"
44
45 #include "tgsi/tgsi_ureg.h"
46
47 #include "util/u_draw.h"
48 #include "util/u_memory.h"
49 #include "util/u_math.h"
50
51 #include "vl_types.h"
52 #include "vl_video_buffer.h"
53 #include "vl_vertex_buffers.h"
54 #include "vl_deint_filter.h"
55
56 enum VS_OUTPUT
57 {
58 VS_O_VPOS = 0,
59 VS_O_VTEX = 0
60 };
61
62 static void *
63 create_vert_shader(struct vl_deint_filter *filter)
64 {
65 struct ureg_program *shader;
66 struct ureg_src i_vpos;
67 struct ureg_dst o_vpos, o_vtex;
68
69 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
70 if (!shader)
71 return NULL;
72
73 i_vpos = ureg_DECL_vs_input(shader, 0);
74 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
75 o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
76
77 ureg_MOV(shader, o_vpos, i_vpos);
78 ureg_MOV(shader, o_vtex, i_vpos);
79
80 ureg_END(shader);
81
82 return ureg_create_shader_and_destroy(shader, filter->pipe);
83 }
84
85 static void *
86 create_copy_frag_shader(struct vl_deint_filter *filter, unsigned field)
87 {
88 struct ureg_program *shader;
89 struct ureg_src i_vtex;
90 struct ureg_src sampler;
91 struct ureg_dst o_fragment;
92 struct ureg_dst t_tex;
93
94 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
95 if (!shader) {
96 return NULL;
97 }
98 t_tex = ureg_DECL_temporary(shader);
99
100 i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
101 sampler = ureg_DECL_sampler(shader, 2);
102 o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
103
104 ureg_MOV(shader, t_tex, i_vtex);
105 if (field) {
106 ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
107 ureg_imm4f(shader, 0, 0, 1.0f, 0));
108 } else {
109 ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
110 ureg_imm1f(shader, 0));
111 }
112
113 ureg_TEX(shader, o_fragment, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler);
114
115 ureg_release_temporary(shader, t_tex);
116 ureg_END(shader);
117
118 return ureg_create_shader_and_destroy(shader, filter->pipe);
119 }
120
121 static void *
122 create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field,
123 struct vertex2f *sizes, bool spatial_filter)
124 {
125 struct ureg_program *shader;
126 struct ureg_src i_vtex;
127 struct ureg_src sampler_cur;
128 struct ureg_src sampler_prevprev;
129 struct ureg_src sampler_prev;
130 struct ureg_src sampler_next;
131 struct ureg_dst o_fragment;
132 struct ureg_dst t_tex;
133 struct ureg_dst t_comp_top, t_comp_bot;
134 struct ureg_dst t_diff;
135 struct ureg_dst t_a, t_b;
136 struct ureg_dst t_weave, t_linear;
137
138 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
139 if (!shader) {
140 return NULL;
141 }
142
143 t_tex = ureg_DECL_temporary(shader);
144 t_comp_top = ureg_DECL_temporary(shader);
145 t_comp_bot = ureg_DECL_temporary(shader);
146 t_diff = ureg_DECL_temporary(shader);
147 t_a = ureg_DECL_temporary(shader);
148 t_b = ureg_DECL_temporary(shader);
149 t_weave = ureg_DECL_temporary(shader);
150 t_linear = ureg_DECL_temporary(shader);
151
152 i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
153 sampler_prevprev = ureg_DECL_sampler(shader, 0);
154 sampler_prev = ureg_DECL_sampler(shader, 1);
155 sampler_cur = ureg_DECL_sampler(shader, 2);
156 sampler_next = ureg_DECL_sampler(shader, 3);
157 o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
158
159 // we don't care about ZW interpolation (allows better optimization)
160 ureg_MOV(shader, t_tex, i_vtex);
161 ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW),
162 ureg_imm1f(shader, 0));
163
164 // sample between texels for cheap lowpass
165 ureg_ADD(shader, t_comp_top, ureg_src(t_tex),
166 ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0));
167 ureg_ADD(shader, t_comp_bot, ureg_src(t_tex),
168 ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0));
169
170 if (field == 0) {
171 /* interpolating top field -> current field is a bottom field */
172 // cur vs prev2
173 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
174 ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev);
175 ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b));
176 // prev vs next
177 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev);
178 ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next);
179 ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b));
180 } else {
181 /* interpolating bottom field -> current field is a top field */
182 // cur vs prev2
183 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
184 ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev);
185 ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b));
186 // prev vs next
187 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
188 ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next);
189 ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b));
190 }
191
192 // absolute maximum of differences
193 ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)),
194 ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y));
195
196 if (field == 0) {
197 /* weave with prev top field */
198 ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev);
199 /* get linear interpolation from current bottom field */
200 ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * -1.0f, 1.0f, 0));
201 ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur);
202 } else {
203 /* weave with prev bottom field */
204 ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, 0, 1.0f, 0));
205 ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev);
206 /* get linear interpolation from current top field */
207 ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * 1.0f, 0, 0));
208 ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur);
209 }
210
211 // mix between weave and linear
212 // fully weave if diff < 6 (0.02353), fully interpolate if diff > 14 (0.05490)
213 ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_diff),
214 ureg_imm4f(shader, -0.02353f, 0, 0, 0));
215 ureg_MUL(shader, ureg_saturate(ureg_writemask(t_diff, TGSI_WRITEMASK_X)),
216 ureg_src(t_diff), ureg_imm4f(shader, 31.8750f, 0, 0, 0));
217 ureg_LRP(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X), ureg_src(t_diff),
218 ureg_src(t_linear), ureg_src(t_weave));
219
220 ureg_release_temporary(shader, t_tex);
221 ureg_release_temporary(shader, t_comp_top);
222 ureg_release_temporary(shader, t_comp_bot);
223 ureg_release_temporary(shader, t_diff);
224 ureg_release_temporary(shader, t_a);
225 ureg_release_temporary(shader, t_b);
226 ureg_release_temporary(shader, t_weave);
227 ureg_release_temporary(shader, t_linear);
228 ureg_END(shader);
229
230 return ureg_create_shader_and_destroy(shader, filter->pipe);
231 }
232
233 bool
234 vl_deint_filter_init(struct vl_deint_filter *filter, struct pipe_context *pipe,
235 unsigned video_width, unsigned video_height,
236 bool skip_chroma, bool spatial_filter)
237 {
238 struct pipe_rasterizer_state rs_state;
239 struct pipe_blend_state blend;
240 struct pipe_sampler_state sampler;
241 struct pipe_vertex_element ve;
242 struct vertex2f sizes;
243 struct pipe_video_buffer templ;
244
245 assert(filter && pipe);
246 assert(video_width && video_height);
247
248 memset(filter, 0, sizeof(*filter));
249 filter->pipe = pipe;
250 filter->skip_chroma = skip_chroma;
251 filter->video_width = video_width;
252 filter->video_height = video_height;
253
254 /* TODO: handle other than 4:2:0 subsampling */
255 memset(&templ, 0, sizeof(templ));
256 templ.buffer_format = PIPE_FORMAT_YV12;
257 templ.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
258 templ.width = video_width;
259 templ.height = video_height;
260 templ.interlaced = true;
261 filter->video_buffer = vl_video_buffer_create(pipe, &templ);
262 if (!filter->video_buffer)
263 goto error_video_buffer;
264
265 memset(&rs_state, 0, sizeof(rs_state));
266 rs_state.half_pixel_center = true;
267 rs_state.bottom_edge_rule = true;
268 rs_state.depth_clip = 1;
269 filter->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
270 if (!filter->rs_state)
271 goto error_rs_state;
272
273 memset(&blend, 0, sizeof blend);
274 blend.rt[0].colormask = PIPE_MASK_RGBA;
275 filter->blend = pipe->create_blend_state(pipe, &blend);
276 if (!filter->blend)
277 goto error_blend;
278
279 memset(&sampler, 0, sizeof(sampler));
280 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
281 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
282 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
283 sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
284 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
285 sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
286 sampler.normalized_coords = 1;
287 filter->sampler[0] = pipe->create_sampler_state(pipe, &sampler);
288 filter->sampler[1] = filter->sampler[2] = filter->sampler[3] = filter->sampler[0];
289 if (!filter->sampler[0])
290 goto error_sampler;
291
292 filter->quad = vl_vb_upload_quads(pipe);
293 if(!filter->quad.buffer)
294 goto error_quad;
295
296 memset(&ve, 0, sizeof(ve));
297 ve.src_offset = 0;
298 ve.instance_divisor = 0;
299 ve.vertex_buffer_index = 0;
300 ve.src_format = PIPE_FORMAT_R32G32_FLOAT;
301 filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve);
302 if (!filter->ves)
303 goto error_ves;
304
305 sizes.x = 1.0f / video_width;
306 sizes.y = 1.0f / video_height;
307
308 filter->vs = create_vert_shader(filter);
309 if (!filter->vs)
310 goto error_vs;
311
312 filter->fs_copy_top = create_copy_frag_shader(filter, 0);
313 if (!filter->fs_copy_top)
314 goto error_fs_copy_top;
315
316 filter->fs_copy_bottom = create_copy_frag_shader(filter, 1);
317 if (!filter->fs_copy_bottom)
318 goto error_fs_copy_bottom;
319
320 filter->fs_deint_top = create_deint_frag_shader(filter, 0, &sizes, spatial_filter);
321 if (!filter->fs_deint_top)
322 goto error_fs_deint_top;
323
324 filter->fs_deint_bottom = create_deint_frag_shader(filter, 1, &sizes, spatial_filter);
325 if (!filter->fs_deint_bottom)
326 goto error_fs_deint_bottom;
327
328 return true;
329
330 error_fs_deint_bottom:
331 pipe->delete_fs_state(pipe, filter->fs_deint_top);
332
333 error_fs_deint_top:
334 pipe->delete_fs_state(pipe, filter->fs_copy_bottom);
335
336 error_fs_copy_bottom:
337 pipe->delete_fs_state(pipe, filter->fs_copy_top);
338
339 error_fs_copy_top:
340 pipe->delete_vs_state(pipe, filter->vs);
341
342 error_vs:
343 pipe->delete_vertex_elements_state(pipe, filter->ves);
344
345 error_ves:
346 pipe_resource_reference(&filter->quad.buffer, NULL);
347
348 error_quad:
349 pipe->delete_sampler_state(pipe, filter->sampler);
350
351 error_sampler:
352 pipe->delete_blend_state(pipe, filter->blend);
353
354 error_blend:
355 pipe->delete_rasterizer_state(pipe, filter->rs_state);
356
357 error_rs_state:
358 filter->video_buffer->destroy(filter->video_buffer);
359
360 error_video_buffer:
361 return false;
362 }
363
364 void
365 vl_deint_filter_cleanup(struct vl_deint_filter *filter)
366 {
367 assert(filter);
368
369 filter->pipe->delete_sampler_state(filter->pipe, filter->sampler[0]);
370 filter->pipe->delete_blend_state(filter->pipe, filter->blend);
371 filter->pipe->delete_rasterizer_state(filter->pipe, filter->rs_state);
372 filter->pipe->delete_vertex_elements_state(filter->pipe, filter->ves);
373 pipe_resource_reference(&filter->quad.buffer, NULL);
374
375 filter->pipe->delete_vs_state(filter->pipe, filter->vs);
376 filter->pipe->delete_fs_state(filter->pipe, filter->fs_copy_top);
377 filter->pipe->delete_fs_state(filter->pipe, filter->fs_copy_bottom);
378 filter->pipe->delete_fs_state(filter->pipe, filter->fs_deint_top);
379 filter->pipe->delete_fs_state(filter->pipe, filter->fs_deint_bottom);
380
381 filter->video_buffer->destroy(filter->video_buffer);
382 }
383
384 bool
385 vl_deint_filter_check_buffers(struct vl_deint_filter *filter,
386 struct pipe_video_buffer *prevprev,
387 struct pipe_video_buffer *prev,
388 struct pipe_video_buffer *cur,
389 struct pipe_video_buffer *next)
390 {
391 int i;
392 struct pipe_video_buffer *bufs[] = { prevprev, prev, cur, next };
393
394 for (i = 0; i < 4; i++) {
395 if (bufs[i]->chroma_format != PIPE_VIDEO_CHROMA_FORMAT_420)
396 return false;
397 if (bufs[i]->width < filter->video_width ||
398 bufs[i]->height < filter->video_height)
399 return false;
400 if (!bufs[i]->interlaced)
401 return false;
402 }
403
404 return true;
405 }
406
407 void
408 vl_deint_filter_render(struct vl_deint_filter *filter,
409 struct pipe_video_buffer *prevprev,
410 struct pipe_video_buffer *prev,
411 struct pipe_video_buffer *cur,
412 struct pipe_video_buffer *next,
413 unsigned field)
414 {
415 struct pipe_viewport_state viewport;
416 struct pipe_framebuffer_state fb_state;
417 struct pipe_sampler_view **cur_sv;
418 struct pipe_sampler_view **prevprev_sv;
419 struct pipe_sampler_view **prev_sv;
420 struct pipe_sampler_view **next_sv;
421 struct pipe_sampler_view *sampler_views[4];
422 struct pipe_surface **dst_surfaces;
423 int j;
424
425 assert(filter && prevprev && prev && cur && next && field <= 1);
426
427 /* set up destination and source */
428 dst_surfaces = filter->video_buffer->get_surfaces(filter->video_buffer);
429 cur_sv = cur->get_sampler_view_components(cur);
430 prevprev_sv = prevprev->get_sampler_view_components(prevprev);
431 prev_sv = prev->get_sampler_view_components(prev);
432 next_sv = next->get_sampler_view_components(next);
433
434 /* set up pipe state */
435 filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state);
436 filter->pipe->bind_blend_state(filter->pipe, filter->blend);
437 filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, &filter->quad);
438 filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves);
439 filter->pipe->bind_vs_state(filter->pipe, filter->vs);
440 filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT,
441 0, 4, filter->sampler);
442
443 /* prepare viewport */
444 memset(&viewport, 0, sizeof(viewport));
445 viewport.scale[2] = 1;
446 viewport.scale[3] = 1;
447
448 /* prepare framebuffer */
449 memset(&fb_state, 0, sizeof(fb_state));
450 fb_state.nr_cbufs = 1;
451
452 /* process each plane separately */
453 for (j = 0; j < 3; j++) {
454 /* select correct YV12 surfaces */
455 int k = j == 1 ? 2 :
456 j == 2 ? 1 : 0;
457 struct pipe_surface *blit_surf = dst_surfaces[2 * k + field];
458 struct pipe_surface *dst_surf = dst_surfaces[2 * k + 1 - field];
459
460 /* update render target state */
461 viewport.scale[0] = blit_surf->texture->width0;
462 viewport.scale[1] = blit_surf->texture->height0;
463 fb_state.width = blit_surf->texture->width0;
464 fb_state.height = blit_surf->texture->height0;
465
466 /* update sampler view sources */
467 sampler_views[0] = prevprev_sv[j];
468 sampler_views[1] = prev_sv[j];
469 sampler_views[2] = cur_sv[j];
470 sampler_views[3] = next_sv[j];
471 filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT, 0, 4, sampler_views);
472
473 /* blit current field */
474 fb_state.cbufs[0] = blit_surf;
475 filter->pipe->bind_fs_state(filter->pipe, field ? filter->fs_copy_bottom : filter->fs_copy_top);
476 filter->pipe->set_framebuffer_state(filter->pipe, &fb_state);
477 filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport);
478 util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
479
480 /* blit or interpolate other field */
481 fb_state.cbufs[0] = dst_surf;
482 filter->pipe->set_framebuffer_state(filter->pipe, &fb_state);
483 if (j > 0 && filter->skip_chroma) {
484 util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
485 } else {
486 filter->pipe->bind_fs_state(filter->pipe, field ? filter->fs_deint_top : filter->fs_deint_bottom);
487 util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
488 }
489 }
490 }