6bebac1e88a38e52fb558954d4d2337f4f97e642
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
1 /**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_idct.h"
29 #include "vl_vertex_buffers.h"
30 #include "vl_ycbcr_buffer.h"
31 #include "vl_defines.h"
32 #include "util/u_draw.h"
33 #include <assert.h>
34 #include <pipe/p_context.h>
35 #include <pipe/p_screen.h>
36 #include <util/u_inlines.h>
37 #include <util/u_sampler.h>
38 #include <util/u_format.h>
39 #include <tgsi/tgsi_ureg.h>
40 #include "vl_types.h"
41
42 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
43
44 #define NR_RENDER_TARGETS 4
45
46 enum VS_OUTPUT
47 {
48 VS_O_VPOS,
49 VS_O_L_ADDR0,
50 VS_O_L_ADDR1,
51 VS_O_R_ADDR0,
52 VS_O_R_ADDR1
53 };
54
55 static const float const_matrix[8][8] = {
56 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f },
57 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
58 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f },
59 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f },
60 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f },
61 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f },
62 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f },
63 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
64 };
65
66 static void
67 calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
68 struct ureg_src tc, struct ureg_src start, bool right_side,
69 bool transposed, float size)
70 {
71 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
72 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
73
74 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
75 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
76
77 /*
78 * addr[0..1].(start) = right_side ? start.x : tc.x
79 * addr[0..1].(tc) = right_side ? tc.y : start.y
80 * addr[0..1].z = tc.z
81 * addr[1].(start) += 1.0f / scale
82 */
83 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
84 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
85 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
86
87 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
88 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
89 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc);
90 }
91
92 static void *
93 create_vert_shader(struct vl_idct *idct, bool matrix_stage)
94 {
95 struct ureg_program *shader;
96 struct ureg_src vrect, vpos, vblock, eb;
97 struct ureg_src scale, blocks_xy;
98 struct ureg_dst t_tex, t_start;
99 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
100 unsigned label;
101
102 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
103 if (!shader)
104 return NULL;
105
106 t_tex = ureg_DECL_temporary(shader);
107 t_start = ureg_DECL_temporary(shader);
108
109 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
110 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
111 vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
112
113 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
114
115 eb = ureg_DECL_vs_input(shader, VS_I_EB);
116
117 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
118 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
119
120 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
121 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
122
123 /*
124 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
125 * blocks_xy = (blocks_x, blocks_y)
126 *
127 * if eb.(vblock.y, vblock.x)
128 * o_vpos.xy = -1
129 * else
130 * t_tex = vpos * blocks_xy + vblock
131 * t_start = t_tex * scale
132 * t_tex = t_tex + vrect
133 * o_vpos.xy = t_tex * scale
134 *
135 * o_l_addr = calc_addr(...)
136 * o_r_addr = calc_addr(...)
137 * endif
138 * o_vpos.zw = vpos
139 *
140 */
141
142 scale = ureg_imm2f(shader,
143 (float)BLOCK_WIDTH / idct->buffer_width,
144 (float)BLOCK_HEIGHT / idct->buffer_height);
145
146 blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
147
148 if (idct->blocks_x > 1 || idct->blocks_y > 1) {
149 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
150 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
151 ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
152 ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
153
154 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
155 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
156 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
157 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
158
159 eb = ureg_src(t_tex);
160 }
161
162 ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
163
164 ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
165
166 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
167 ureg_ELSE(shader, &label);
168
169 ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock);
170 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
171
172 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect);
173
174 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
175 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
176 ureg_scalar(vrect, TGSI_SWIZZLE_X),
177 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
178
179 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
180
181 if(matrix_stage) {
182 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
183 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
184 } else {
185 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
186 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
187 }
188
189 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
190 ureg_ENDIF(shader);
191
192 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
193
194 ureg_release_temporary(shader, t_tex);
195 ureg_release_temporary(shader, t_start);
196
197 ureg_END(shader);
198
199 return ureg_create_shader_and_destroy(shader, idct->pipe);
200 }
201
202 static void
203 increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
204 struct ureg_src saddr[2], bool right_side, bool transposed,
205 int pos, float size)
206 {
207 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
208 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
209
210 /*
211 * daddr[0..1].(start) = saddr[0..1].(start)
212 * daddr[0..1].(tc) = saddr[0..1].(tc)
213 */
214
215 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
216 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
217 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
218 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
219 }
220
221 static void
222 fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
223 {
224 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
225 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
226 }
227
228 static void
229 matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
230 {
231 struct ureg_dst tmp;
232
233 tmp = ureg_DECL_temporary(shader);
234
235 /*
236 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
237 * dst = tmp.x + tmp.y
238 */
239 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
240 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
241 ureg_ADD(shader, dst,
242 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
243 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
244
245 ureg_release_temporary(shader, tmp);
246 }
247
248 static void *
249 create_matrix_frag_shader(struct vl_idct *idct)
250 {
251 struct ureg_program *shader;
252
253 struct ureg_src l_addr[2], r_addr[2];
254
255 struct ureg_dst l[4][2], r[2];
256 struct ureg_dst fragment[NR_RENDER_TARGETS];
257
258 unsigned i, j;
259
260 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
261 if (!shader)
262 return NULL;
263
264 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
265 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
266
267 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
268 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
269
270 for (i = 0; i < NR_RENDER_TARGETS; ++i)
271 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
272
273 for (i = 0; i < 4; ++i) {
274 l[i][0] = ureg_DECL_temporary(shader);
275 l[i][1] = ureg_DECL_temporary(shader);
276 }
277
278 r[0] = ureg_DECL_temporary(shader);
279 r[1] = ureg_DECL_temporary(shader);
280
281 for (i = 1; i < 4; ++i) {
282 increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height);
283 }
284
285 for (i = 0; i < 4; ++i) {
286 struct ureg_src s_addr[2];
287 s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]);
288 s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
289 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
290 }
291
292 for (i = 0; i < NR_RENDER_TARGETS; ++i) {
293 if(i > 0)
294 increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
295
296 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
297 s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]);
298 s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]);
299 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
300
301 for (j = 0; j < 4; ++j) {
302 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
303 }
304 }
305
306 for (i = 0; i < 4; ++i) {
307 ureg_release_temporary(shader, l[i][0]);
308 ureg_release_temporary(shader, l[i][1]);
309 }
310 ureg_release_temporary(shader, r[0]);
311 ureg_release_temporary(shader, r[1]);
312
313 ureg_END(shader);
314
315 return ureg_create_shader_and_destroy(shader, idct->pipe);
316 }
317
318 static void *
319 create_transpose_frag_shader(struct vl_idct *idct)
320 {
321 struct ureg_program *shader;
322
323 struct ureg_src l_addr[2], r_addr[2];
324
325 struct ureg_dst l[2], r[2];
326 struct ureg_dst fragment;
327
328 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
329 if (!shader)
330 return NULL;
331
332 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
333 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
334
335 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
336 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
337
338 l[0] = ureg_DECL_temporary(shader);
339 l[1] = ureg_DECL_temporary(shader);
340 r[0] = ureg_DECL_temporary(shader);
341 r[1] = ureg_DECL_temporary(shader);
342
343 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
344 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
345
346 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
347
348 matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r);
349
350 ureg_release_temporary(shader, l[0]);
351 ureg_release_temporary(shader, l[1]);
352 ureg_release_temporary(shader, r[0]);
353 ureg_release_temporary(shader, r[1]);
354
355 ureg_END(shader);
356
357 return ureg_create_shader_and_destroy(shader, idct->pipe);
358 }
359
360 static bool
361 init_shaders(struct vl_idct *idct)
362 {
363 idct->matrix_vs = create_vert_shader(idct, true);
364 if (!idct->matrix_vs)
365 goto error_matrix_vs;
366
367 idct->matrix_fs = create_matrix_frag_shader(idct);
368 if (!idct->matrix_fs)
369 goto error_matrix_fs;
370
371 idct->transpose_vs = create_vert_shader(idct, false);
372 if (!idct->transpose_vs)
373 goto error_transpose_vs;
374
375 idct->transpose_fs = create_transpose_frag_shader(idct);
376 if (!idct->transpose_fs)
377 goto error_transpose_fs;
378
379 return true;
380
381 error_transpose_fs:
382 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
383
384 error_transpose_vs:
385 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
386
387 error_matrix_fs:
388 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
389
390 error_matrix_vs:
391 return false;
392 }
393
394 static void
395 cleanup_shaders(struct vl_idct *idct)
396 {
397 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
398 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
399 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
400 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
401 }
402
403 static bool
404 init_state(struct vl_idct *idct)
405 {
406 struct pipe_sampler_state sampler;
407 struct pipe_rasterizer_state rs_state;
408 unsigned i;
409
410 assert(idct);
411
412 memset(&rs_state, 0, sizeof(rs_state));
413 rs_state.gl_rasterization_rules = false;
414 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
415 if (!idct->rs_state)
416 goto error_rs_state;
417
418 for (i = 0; i < 2; ++i) {
419 memset(&sampler, 0, sizeof(sampler));
420 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
421 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
422 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
423 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
424 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
425 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
426 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
427 sampler.compare_func = PIPE_FUNC_ALWAYS;
428 sampler.normalized_coords = 1;
429 idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
430 if (!idct->samplers[i])
431 goto error_samplers;
432 }
433
434 return true;
435
436 error_samplers:
437 for (i = 0; i < 2; ++i)
438 if (idct->samplers[i])
439 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
440
441 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
442
443 error_rs_state:
444 return false;
445 }
446
447 static void
448 cleanup_state(struct vl_idct *idct)
449 {
450 unsigned i;
451
452 for (i = 0; i < 2; ++i)
453 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
454
455 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
456 }
457
458 static bool
459 init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
460 {
461 struct pipe_resource tex_templ, *tex;
462 struct pipe_sampler_view sv_templ;
463 struct pipe_surface surf_templ;
464 unsigned i;
465
466 assert(idct && buffer);
467
468 memset(&tex_templ, 0, sizeof(tex_templ));
469 tex_templ.target = PIPE_TEXTURE_3D;
470 tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM;
471 tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS;
472 tex_templ.height0 = idct->buffer_height / 4;
473 tex_templ.depth0 = NR_RENDER_TARGETS;
474 tex_templ.array_size = 1;
475 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
476 tex_templ.usage = PIPE_USAGE_STATIC;
477
478 tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ);
479 if (!tex)
480 goto error_tex;
481
482 memset(&sv_templ, 0, sizeof(sv_templ));
483 u_sampler_view_default_template(&sv_templ, tex, tex->format);
484 buffer->sampler_views.individual.intermediate =
485 idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ);
486 if (!buffer->sampler_views.individual.intermediate)
487 goto error_sampler_view;
488
489 buffer->fb_state[0].width = tex->width0;
490 buffer->fb_state[0].height = tex->height0;
491 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
492 for(i = 0; i < NR_RENDER_TARGETS; ++i) {
493 memset(&surf_templ, 0, sizeof(surf_templ));
494 surf_templ.format = tex->format;
495 surf_templ.u.tex.first_layer = i;
496 surf_templ.u.tex.last_layer = i;
497 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
498 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
499 idct->pipe, tex, &surf_templ);
500
501 if (!buffer->fb_state[0].cbufs[i])
502 goto error_surfaces;
503 }
504
505 buffer->viewport[0].scale[0] = tex->width0;
506 buffer->viewport[0].scale[1] = tex->height0;
507
508 pipe_resource_reference(&tex, NULL);
509 return true;
510
511 error_surfaces:
512 for(i = 0; i < NR_RENDER_TARGETS; ++i)
513 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
514
515 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
516
517 error_sampler_view:
518 pipe_resource_reference(&tex, NULL);
519
520 error_tex:
521 return false;
522 }
523
524 static void
525 cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
526 {
527 unsigned i;
528
529 assert(idct && buffer);
530
531 for(i = 0; i < NR_RENDER_TARGETS; ++i)
532 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
533
534 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
535 }
536
537 struct pipe_sampler_view *
538 vl_idct_upload_matrix(struct pipe_context *pipe)
539 {
540 const float scale = sqrtf(SCALE_FACTOR_16_TO_9);
541
542 struct pipe_resource tex_templ, *matrix;
543 struct pipe_sampler_view sv_templ, *sv;
544 struct pipe_transfer *buf_transfer;
545 unsigned i, j, pitch;
546 float *f;
547
548 struct pipe_box rect =
549 {
550 0, 0, 0,
551 BLOCK_WIDTH / 4,
552 BLOCK_HEIGHT,
553 1
554 };
555
556 assert(pipe);
557
558 memset(&tex_templ, 0, sizeof(tex_templ));
559 tex_templ.target = PIPE_TEXTURE_2D;
560 tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
561 tex_templ.last_level = 0;
562 tex_templ.width0 = 2;
563 tex_templ.height0 = 8;
564 tex_templ.depth0 = 1;
565 tex_templ.array_size = 1;
566 tex_templ.usage = PIPE_USAGE_IMMUTABLE;
567 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
568 tex_templ.flags = 0;
569
570 matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
571 if (!matrix)
572 goto error_matrix;
573
574 buf_transfer = pipe->get_transfer
575 (
576 pipe, matrix,
577 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
578 &rect
579 );
580 if (!buf_transfer)
581 goto error_transfer;
582
583 pitch = buf_transfer->stride / sizeof(float);
584
585 f = pipe->transfer_map(pipe, buf_transfer);
586 if (!f)
587 goto error_map;
588
589 for(i = 0; i < BLOCK_HEIGHT; ++i)
590 for(j = 0; j < BLOCK_WIDTH; ++j)
591 // transpose and scale
592 f[i * pitch + j] = const_matrix[j][i] * scale;
593
594 pipe->transfer_unmap(pipe, buf_transfer);
595 pipe->transfer_destroy(pipe, buf_transfer);
596
597 memset(&sv_templ, 0, sizeof(sv_templ));
598 u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
599 sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
600 pipe_resource_reference(&matrix, NULL);
601 if (!sv)
602 goto error_map;
603
604 return sv;
605
606 error_map:
607 pipe->transfer_destroy(pipe, buf_transfer);
608
609 error_transfer:
610 pipe_resource_reference(&matrix, NULL);
611
612 error_matrix:
613 return NULL;
614 }
615
616 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
617 unsigned buffer_width, unsigned buffer_height,
618 unsigned blocks_x, unsigned blocks_y,
619 struct pipe_sampler_view *matrix)
620 {
621 assert(idct && pipe && matrix);
622
623 idct->pipe = pipe;
624 idct->buffer_width = buffer_width;
625 idct->buffer_height = buffer_height;
626 idct->blocks_x = blocks_x;
627 idct->blocks_y = blocks_y;
628 pipe_sampler_view_reference(&idct->matrix, matrix);
629
630 if(!init_shaders(idct))
631 return false;
632
633 if(!init_state(idct)) {
634 cleanup_shaders(idct);
635 return false;
636 }
637
638 return true;
639 }
640
641 void
642 vl_idct_cleanup(struct vl_idct *idct)
643 {
644 cleanup_shaders(idct);
645 cleanup_state(idct);
646
647 pipe_sampler_view_reference(&idct->matrix, NULL);
648 }
649
650 bool
651 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
652 struct pipe_sampler_view *source, struct pipe_surface *destination)
653 {
654 unsigned i;
655
656 assert(buffer);
657 assert(idct);
658 assert(source);
659 assert(destination);
660
661 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
662 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
663 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix);
664
665 if (!init_intermediate(idct, buffer))
666 return false;
667
668 /* init state */
669 buffer->fb_state[1].width = destination->texture->width0;
670 buffer->fb_state[1].height = destination->texture->height0;
671 buffer->fb_state[1].nr_cbufs = 1;
672 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination);
673
674 buffer->viewport[1].scale[0] = destination->texture->width0;
675 buffer->viewport[1].scale[1] = destination->texture->height0;
676
677 for(i = 0; i < 2; ++i) {
678 buffer->viewport[i].scale[2] = 1;
679 buffer->viewport[i].scale[3] = 1;
680 buffer->viewport[i].translate[0] = 0;
681 buffer->viewport[i].translate[1] = 0;
682 buffer->viewport[i].translate[2] = 0;
683 buffer->viewport[i].translate[3] = 0;
684
685 buffer->fb_state[i].zsbuf = NULL;
686 }
687
688 return true;
689 }
690
691 void
692 vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
693 {
694 unsigned i;
695
696 assert(idct && buffer);
697
698 for(i = 0; i < NR_RENDER_TARGETS; ++i)
699 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
700
701 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
702
703 cleanup_intermediate(idct, buffer);
704 }
705
706 void
707 vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
708 {
709 struct pipe_resource *tex;
710
711 assert(idct && buffer);
712
713 tex = buffer->sampler_views.individual.source->texture;
714
715 struct pipe_box rect =
716 {
717 0, 0, 0,
718 tex->width0,
719 tex->height0,
720 1
721 };
722
723 buffer->tex_transfer = idct->pipe->get_transfer
724 (
725 idct->pipe, tex,
726 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
727 &rect
728 );
729
730 buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
731 }
732
733 void
734 vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
735 {
736 unsigned tex_pitch;
737 short *texels;
738
739 unsigned i;
740
741 assert(buffer);
742 assert(block);
743
744 tex_pitch = buffer->tex_transfer->stride / sizeof(short);
745 texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
746
747 for (i = 0; i < BLOCK_HEIGHT; ++i)
748 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
749 }
750
751 void
752 vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
753 {
754 assert(idct && buffer);
755
756 idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
757 idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
758 }
759
760 void
761 vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
762 {
763 unsigned num_verts;
764
765 assert(idct);
766 assert(buffer);
767
768 if(num_instances > 0) {
769 num_verts = idct->blocks_x * idct->blocks_y * 4;
770
771 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
772 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
773
774 /* first stage */
775 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
776 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
777 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
778 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
779 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
780 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
781
782 /* second stage */
783 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
784 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
785 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
786 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
787 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
788 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
789 }
790 }