f3f8cbee778ebc166854d1d396b1bb5ec84149c6
[mesa.git] / src / gallium / auxiliary / vl / vl_bicubic_filter.c
1 /**************************************************************************
2 *
3 * Copyright 2016 Nayan Deshmukh.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdio.h>
29
30 #include "pipe/p_context.h"
31
32 #include "tgsi/tgsi_ureg.h"
33
34 #include "util/u_draw.h"
35 #include "util/u_memory.h"
36 #include "util/u_math.h"
37 #include "util/u_rect.h"
38 #include "util/u_upload_mgr.h"
39
40 #include "vl_types.h"
41 #include "vl_vertex_buffers.h"
42 #include "vl_bicubic_filter.h"
43
44 enum VS_OUTPUT
45 {
46 VS_O_VPOS = 0,
47 VS_O_VTEX = 0
48 };
49
50 static void *
51 create_vert_shader(struct vl_bicubic_filter *filter)
52 {
53 struct ureg_program *shader;
54 struct ureg_src i_vpos;
55 struct ureg_dst o_vpos, o_vtex;
56
57 shader = ureg_create(PIPE_SHADER_VERTEX);
58 if (!shader)
59 return NULL;
60
61 i_vpos = ureg_DECL_vs_input(shader, 0);
62 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
63 o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX);
64
65 ureg_MOV(shader, o_vpos, i_vpos);
66 ureg_MOV(shader, o_vtex, i_vpos);
67
68 ureg_END(shader);
69
70 return ureg_create_shader_and_destroy(shader, filter->pipe);
71 }
72
73 static void
74 create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a,
75 struct ureg_src tex_b, struct ureg_src tex_c,
76 struct ureg_src tex_d, struct ureg_src t,
77 struct ureg_dst o_fragment)
78 {
79 struct ureg_dst temp[11];
80 struct ureg_dst t_2;
81 unsigned i;
82
83 for(i = 0; i < 11; ++i)
84 temp[i] = ureg_DECL_temporary(shader);
85 t_2 = ureg_DECL_temporary(shader);
86
87 /*
88 * |temp[0]| | 0 2 0 0 | |tex_a|
89 * |temp[1]| = | -1 0 1 0 |* |tex_b|
90 * |temp[2]| | 2 -5 4 -1 | |tex_c|
91 * |temp[3]| | -1 3 -3 1 | |tex_d|
92 */
93 ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f));
94
95 ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f));
96 ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f),
97 ureg_src(temp[1]));
98
99 ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f));
100 ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f),
101 ureg_src(temp[2]));
102 ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f),
103 ureg_src(temp[2]));
104 ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f),
105 ureg_src(temp[2]));
106
107 ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f));
108 ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f),
109 ureg_src(temp[3]));
110 ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f),
111 ureg_src(temp[3]));
112 ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f),
113 ureg_src(temp[3]));
114
115 /*
116 * t_2 = t*t
117 * o_fragment = 0.5*|1 t t^2 t^3|*|temp[0]|
118 * |temp[1]|
119 * |temp[2]|
120 * |temp[3]|
121 */
122
123 ureg_MUL(shader, t_2, t, t);
124 ureg_MUL(shader, temp[4], ureg_src(t_2), t);
125
126 ureg_MUL(shader, temp[4], ureg_src(temp[4]),
127 ureg_src(temp[3]));
128 ureg_MUL(shader, temp[5], ureg_src(t_2),
129 ureg_src(temp[2]));
130 ureg_MUL(shader, temp[6], t,
131 ureg_src(temp[1]));
132 ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f),
133 ureg_src(temp[0]));
134 ureg_ADD(shader, temp[8], ureg_src(temp[4]),
135 ureg_src(temp[5]));
136 ureg_ADD(shader, temp[9], ureg_src(temp[6]),
137 ureg_src(temp[7]));
138
139 ureg_ADD(shader, temp[10], ureg_src(temp[8]),
140 ureg_src(temp[9]));
141 ureg_MUL(shader, o_fragment, ureg_src(temp[10]),
142 ureg_imm1f(shader, 0.5f));
143
144
145 for(i = 0; i < 11; ++i)
146 ureg_release_temporary(shader, temp[i]);
147 ureg_release_temporary(shader, t_2);
148 }
149
150 static void *
151 create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width,
152 unsigned video_height, struct vertex2f *offsets)
153 {
154 struct pipe_screen *screen = filter->pipe->screen;
155 struct ureg_program *shader;
156 struct ureg_src i_vtex, vtex;
157 struct ureg_src sampler;
158 struct ureg_src half_pixel;
159 struct ureg_dst t_array[23];
160 struct ureg_dst o_fragment;
161 struct ureg_dst t;
162 unsigned i;
163
164 if (screen->get_shader_param(
165 screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) {
166
167 return NULL;
168 }
169
170 shader = ureg_create(PIPE_SHADER_FRAGMENT);
171 if (!shader) {
172 return NULL;
173 }
174
175 i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR);
176 sampler = ureg_DECL_sampler(shader, 0);
177 ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
178 TGSI_RETURN_TYPE_FLOAT,
179 TGSI_RETURN_TYPE_FLOAT,
180 TGSI_RETURN_TYPE_FLOAT,
181 TGSI_RETURN_TYPE_FLOAT);
182
183 for (i = 0; i < 23; ++i)
184 t_array[i] = ureg_DECL_temporary(shader);
185 t = ureg_DECL_temporary(shader);
186
187 half_pixel = ureg_DECL_constant(shader, 0);
188 o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
189
190 /*
191 * temp = (i_vtex - (0.5/dst_size)) * i_size)
192 * t = frac(temp)
193 * vtex = floor(i_vtex)/i_size
194 */
195 ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY),
196 i_vtex, ureg_negate(half_pixel));
197 ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
198 ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height));
199 ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY),
200 ureg_src(t_array[22]));
201
202 ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
203 ureg_src(t_array[22]));
204 ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
205 ureg_src(t_array[22]), ureg_imm2f(shader, video_width, video_height));
206 ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
207 ureg_src(t_array[22]), half_pixel);
208
209 /*
210 * t_array[0..*] = vtex + offset[0..*]
211 * t_array[0..*] = tex(t_array[0..*], sampler)
212 * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
213 * o_fragment = cubic_interpolate(t_array[16..19], t_y)
214 */
215 vtex = ureg_src(t_array[22]);
216 for (i = 0; i < 16; ++i) {
217 ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
218 vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y));
219 ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
220 ureg_imm1f(shader, 0.0f));
221 }
222
223 for (i = 0; i < 16; ++i) {
224 ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler);
225 }
226
227 for(i = 0; i < 4; ++i)
228 create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]),
229 ureg_src(t_array[4*i+1]), ureg_src(t_array[4*i+2]), ureg_src(t_array[4*i+3]),
230 ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X), t_array[16+i]);
231
232 create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]),
233 ureg_src(t_array[17]), ureg_src(t_array[18]), ureg_src(t_array[19]),
234 ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y), o_fragment);
235
236 for(i = 0; i < 23; ++i)
237 ureg_release_temporary(shader, t_array[i]);
238 ureg_release_temporary(shader, t);
239
240 ureg_END(shader);
241
242 return ureg_create_shader_and_destroy(shader, filter->pipe);
243 }
244
245 bool
246 vl_bicubic_filter_init(struct vl_bicubic_filter *filter, struct pipe_context *pipe,
247 unsigned width, unsigned height)
248 {
249 struct pipe_rasterizer_state rs_state;
250 struct pipe_blend_state blend;
251 struct vertex2f offsets[16];
252 struct pipe_sampler_state sampler;
253 struct pipe_vertex_element ve;
254 unsigned i;
255
256 assert(filter && pipe);
257 assert(width && height);
258
259 memset(filter, 0, sizeof(*filter));
260 filter->pipe = pipe;
261
262 memset(&rs_state, 0, sizeof(rs_state));
263 rs_state.half_pixel_center = true;
264 rs_state.bottom_edge_rule = true;
265 rs_state.depth_clip_near = 1;
266 rs_state.depth_clip_far = 1;
267
268 filter->rs_state = pipe->create_rasterizer_state(pipe, &rs_state);
269 if (!filter->rs_state)
270 goto error_rs_state;
271
272 memset(&blend, 0, sizeof blend);
273 blend.rt[0].rgb_func = PIPE_BLEND_ADD;
274 blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
275 blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
276 blend.rt[0].alpha_func = PIPE_BLEND_ADD;
277 blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
278 blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
279 blend.logicop_func = PIPE_LOGICOP_CLEAR;
280 blend.rt[0].colormask = PIPE_MASK_RGBA;
281 filter->blend = pipe->create_blend_state(pipe, &blend);
282 if (!filter->blend)
283 goto error_blend;
284
285 memset(&sampler, 0, sizeof(sampler));
286 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
287 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
288 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
289 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
290 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
291 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
292 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
293 sampler.compare_func = PIPE_FUNC_ALWAYS;
294 sampler.normalized_coords = 1;
295 filter->sampler = pipe->create_sampler_state(pipe, &sampler);
296 if (!filter->sampler)
297 goto error_sampler;
298
299 filter->quad = vl_vb_upload_quads(pipe);
300 if(!filter->quad.buffer.resource)
301 goto error_quad;
302
303 memset(&ve, 0, sizeof(ve));
304 ve.src_offset = 0;
305 ve.instance_divisor = 0;
306 ve.vertex_buffer_index = 0;
307 ve.src_format = PIPE_FORMAT_R32G32_FLOAT;
308 filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve);
309 if (!filter->ves)
310 goto error_ves;
311
312 offsets[0].x = -1.0f; offsets[0].y = -1.0f;
313 offsets[1].x = 0.0f; offsets[1].y = -1.0f;
314 offsets[2].x = 1.0f; offsets[2].y = -1.0f;
315 offsets[3].x = 2.0f; offsets[3].y = -1.0f;
316
317 offsets[4].x = -1.0f; offsets[4].y = 0.0f;
318 offsets[5].x = 0.0f; offsets[5].y = 0.0f;
319 offsets[6].x = 1.0f; offsets[6].y = 0.0f;
320 offsets[7].x = 2.0f; offsets[7].y = 0.0f;
321
322 offsets[8].x = -1.0f; offsets[8].y = 1.0f;
323 offsets[9].x = 0.0f; offsets[9].y = 1.0f;
324 offsets[10].x = 1.0f; offsets[10].y = 1.0f;
325 offsets[11].x = 2.0f; offsets[11].y = 1.0f;
326
327 offsets[12].x = -1.0f; offsets[12].y = 2.0f;
328 offsets[13].x = 0.0f; offsets[13].y = 2.0f;
329 offsets[14].x = 1.0f; offsets[14].y = 2.0f;
330 offsets[15].x = 2.0f; offsets[15].y = 2.0f;
331
332 for (i = 0; i < 16; ++i) {
333 offsets[i].x /= width;
334 offsets[i].y /= height;
335 }
336
337 filter->vs = create_vert_shader(filter);
338 if (!filter->vs)
339 goto error_vs;
340
341 filter->fs = create_frag_shader(filter, width, height, offsets);
342 if (!filter->fs)
343 goto error_fs;
344
345 return true;
346
347 error_fs:
348 pipe->delete_vs_state(pipe, filter->vs);
349
350 error_vs:
351 pipe->delete_vertex_elements_state(pipe, filter->ves);
352
353 error_ves:
354 pipe_resource_reference(&filter->quad.buffer.resource, NULL);
355
356 error_quad:
357 pipe->delete_sampler_state(pipe, filter->sampler);
358
359 error_sampler:
360 pipe->delete_blend_state(pipe, filter->blend);
361
362 error_blend:
363 pipe->delete_rasterizer_state(pipe, filter->rs_state);
364
365 error_rs_state:
366 return false;
367 }
368
369 void
370 vl_bicubic_filter_cleanup(struct vl_bicubic_filter *filter)
371 {
372 assert(filter);
373
374 filter->pipe->delete_sampler_state(filter->pipe, filter->sampler);
375 filter->pipe->delete_blend_state(filter->pipe, filter->blend);
376 filter->pipe->delete_rasterizer_state(filter->pipe, filter->rs_state);
377 filter->pipe->delete_vertex_elements_state(filter->pipe, filter->ves);
378 pipe_resource_reference(&filter->quad.buffer.resource, NULL);
379
380 filter->pipe->delete_vs_state(filter->pipe, filter->vs);
381 filter->pipe->delete_fs_state(filter->pipe, filter->fs);
382 }
383
384 void
385 vl_bicubic_filter_render(struct vl_bicubic_filter *filter,
386 struct pipe_sampler_view *src,
387 struct pipe_surface *dst,
388 struct u_rect *dst_area,
389 struct u_rect *dst_clip)
390 {
391 struct pipe_viewport_state viewport;
392 struct pipe_framebuffer_state fb_state;
393 struct pipe_scissor_state scissor;
394 union pipe_color_union clear_color;
395
396 assert(filter && src && dst);
397
398 if (dst_clip) {
399 scissor.minx = dst_clip->x0;
400 scissor.miny = dst_clip->y0;
401 scissor.maxx = dst_clip->x1;
402 scissor.maxy = dst_clip->y1;
403 } else {
404 scissor.minx = 0;
405 scissor.miny = 0;
406 scissor.maxx = dst->width;
407 scissor.maxy = dst->height;
408 }
409
410 clear_color.f[0] = clear_color.f[1] = 0.0f;
411 clear_color.f[2] = clear_color.f[3] = 0.0f;
412
413 memset(&viewport, 0, sizeof(viewport));
414 if(dst_area){
415 viewport.scale[0] = dst_area->x1 - dst_area->x0;
416 viewport.scale[1] = dst_area->y1 - dst_area->y0;
417 viewport.translate[0] = dst_area->x0;
418 viewport.translate[1] = dst_area->y0;
419 } else {
420 viewport.scale[0] = dst->width;
421 viewport.scale[1] = dst->height;
422 }
423 viewport.scale[2] = 1;
424
425 struct pipe_constant_buffer cb = {};
426 float *ptr = NULL;
427
428 u_upload_alloc(filter->pipe->const_uploader, 0, 2 * sizeof(float), 256,
429 &cb.buffer_offset, &cb.buffer, (void**)&ptr);
430 cb.buffer_size = 2 * sizeof(float);
431
432 if (ptr) {
433 ptr[0] = 0.5f/viewport.scale[0];
434 ptr[1] = 0.5f/viewport.scale[1];
435 }
436 u_upload_unmap(filter->pipe->const_uploader);
437
438 memset(&fb_state, 0, sizeof(fb_state));
439 fb_state.width = dst->width;
440 fb_state.height = dst->height;
441 fb_state.nr_cbufs = 1;
442 fb_state.cbufs[0] = dst;
443
444 filter->pipe->set_scissor_states(filter->pipe, 0, 1, &scissor);
445 filter->pipe->clear_render_target(filter->pipe, dst, &clear_color,
446 0, 0, dst->width, dst->height, false);
447 filter->pipe->set_constant_buffer(filter->pipe, PIPE_SHADER_FRAGMENT,
448 0, &cb);
449 filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state);
450 filter->pipe->bind_blend_state(filter->pipe, filter->blend);
451 filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT,
452 0, 1, &filter->sampler);
453 filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT,
454 0, 1, &src);
455 filter->pipe->bind_vs_state(filter->pipe, filter->vs);
456 filter->pipe->bind_fs_state(filter->pipe, filter->fs);
457 filter->pipe->set_framebuffer_state(filter->pipe, &fb_state);
458 filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport);
459 filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, &filter->quad);
460 filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves);
461
462 util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4);
463 }