[g3dvl] move mapping/unmapping and uploading of blocks out of idct code
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
1 /**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_idct.h"
29 #include "vl_vertex_buffers.h"
30 #include "vl_defines.h"
31 #include "util/u_draw.h"
32 #include <assert.h>
33 #include <pipe/p_context.h>
34 #include <pipe/p_screen.h>
35 #include <util/u_inlines.h>
36 #include <util/u_sampler.h>
37 #include <util/u_format.h>
38 #include <tgsi/tgsi_ureg.h>
39 #include "vl_types.h"
40
41 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
42
43 #define NR_RENDER_TARGETS 4
44
45 enum VS_OUTPUT
46 {
47 VS_O_VPOS,
48 VS_O_L_ADDR0,
49 VS_O_L_ADDR1,
50 VS_O_R_ADDR0,
51 VS_O_R_ADDR1
52 };
53
54 static const float const_matrix[8][8] = {
55 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f },
56 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
57 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f },
58 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f },
59 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f },
60 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f },
61 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f },
62 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
63 };
64
65 static void
66 calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
67 struct ureg_src tc, struct ureg_src start, bool right_side,
68 bool transposed, float size)
69 {
70 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
71 unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
72
73 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
74 unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
75
76 /*
77 * addr[0..1].(start) = right_side ? start.x : tc.x
78 * addr[0..1].(tc) = right_side ? tc.y : start.y
79 * addr[0..1].z = tc.z
80 * addr[1].(start) += 1.0f / scale
81 */
82 ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
83 ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
84 ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
85
86 ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
87 ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
88 ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc);
89 }
90
91 static void *
92 create_vert_shader(struct vl_idct *idct, bool matrix_stage)
93 {
94 struct ureg_program *shader;
95 struct ureg_src vrect, vpos, vblock, eb;
96 struct ureg_src scale, blocks_xy;
97 struct ureg_dst t_tex, t_start;
98 struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
99 unsigned label;
100
101 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
102 if (!shader)
103 return NULL;
104
105 t_tex = ureg_DECL_temporary(shader);
106 t_start = ureg_DECL_temporary(shader);
107
108 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
109 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
110 vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
111
112 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
113
114 eb = ureg_DECL_vs_input(shader, VS_I_EB);
115
116 o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
117 o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
118
119 o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
120 o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
121
122 /*
123 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
124 * blocks_xy = (blocks_x, blocks_y)
125 *
126 * if eb.(vblock.y, vblock.x)
127 * o_vpos.xy = -1
128 * else
129 * t_tex = vpos * blocks_xy + vblock
130 * t_start = t_tex * scale
131 * t_tex = t_tex + vrect
132 * o_vpos.xy = t_tex * scale
133 *
134 * o_l_addr = calc_addr(...)
135 * o_r_addr = calc_addr(...)
136 * endif
137 * o_vpos.zw = vpos
138 *
139 */
140
141 scale = ureg_imm2f(shader,
142 (float)BLOCK_WIDTH / idct->buffer_width,
143 (float)BLOCK_HEIGHT / idct->buffer_height);
144
145 blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
146
147 if (idct->blocks_x > 1 || idct->blocks_y > 1) {
148 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
149 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
150 ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
151 ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
152
153 ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
154 ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
155 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
156 ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
157
158 eb = ureg_src(t_tex);
159 }
160
161 ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
162
163 ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
164
165 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
166 ureg_ELSE(shader, &label);
167
168 ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock);
169 ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
170
171 ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect);
172
173 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
174 ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
175 ureg_scalar(vrect, TGSI_SWIZZLE_X),
176 ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
177
178 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
179
180 if(matrix_stage) {
181 calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
182 calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
183 } else {
184 calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
185 calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
186 }
187
188 ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
189 ureg_ENDIF(shader);
190
191 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
192
193 ureg_release_temporary(shader, t_tex);
194 ureg_release_temporary(shader, t_start);
195
196 ureg_END(shader);
197
198 return ureg_create_shader_and_destroy(shader, idct->pipe);
199 }
200
201 static void
202 increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
203 struct ureg_src saddr[2], bool right_side, bool transposed,
204 int pos, float size)
205 {
206 unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
207 unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
208
209 /*
210 * daddr[0..1].(start) = saddr[0..1].(start)
211 * daddr[0..1].(tc) = saddr[0..1].(tc)
212 */
213
214 ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
215 ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
216 ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
217 ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
218 }
219
220 static void
221 fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
222 {
223 ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
224 ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
225 }
226
227 static void
228 matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
229 {
230 struct ureg_dst tmp;
231
232 tmp = ureg_DECL_temporary(shader);
233
234 /*
235 * tmp.xy = dot4(m[0][0..1], m[1][0..1])
236 * dst = tmp.x + tmp.y
237 */
238 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
239 ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
240 ureg_ADD(shader, dst,
241 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
242 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
243
244 ureg_release_temporary(shader, tmp);
245 }
246
247 static void *
248 create_matrix_frag_shader(struct vl_idct *idct)
249 {
250 struct ureg_program *shader;
251
252 struct ureg_src l_addr[2], r_addr[2];
253
254 struct ureg_dst l[4][2], r[2];
255 struct ureg_dst fragment[NR_RENDER_TARGETS];
256
257 unsigned i, j;
258
259 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
260 if (!shader)
261 return NULL;
262
263 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
264 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
265
266 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
267 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
268
269 for (i = 0; i < NR_RENDER_TARGETS; ++i)
270 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
271
272 for (i = 0; i < 4; ++i) {
273 l[i][0] = ureg_DECL_temporary(shader);
274 l[i][1] = ureg_DECL_temporary(shader);
275 }
276
277 r[0] = ureg_DECL_temporary(shader);
278 r[1] = ureg_DECL_temporary(shader);
279
280 for (i = 1; i < 4; ++i) {
281 increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height);
282 }
283
284 for (i = 0; i < 4; ++i) {
285 struct ureg_src s_addr[2];
286 s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]);
287 s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
288 fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
289 }
290
291 for (i = 0; i < NR_RENDER_TARGETS; ++i) {
292 if(i > 0)
293 increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
294
295 struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
296 s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]);
297 s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]);
298 fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
299
300 for (j = 0; j < 4; ++j) {
301 matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
302 }
303 }
304
305 for (i = 0; i < 4; ++i) {
306 ureg_release_temporary(shader, l[i][0]);
307 ureg_release_temporary(shader, l[i][1]);
308 }
309 ureg_release_temporary(shader, r[0]);
310 ureg_release_temporary(shader, r[1]);
311
312 ureg_END(shader);
313
314 return ureg_create_shader_and_destroy(shader, idct->pipe);
315 }
316
317 static void *
318 create_transpose_frag_shader(struct vl_idct *idct)
319 {
320 struct ureg_program *shader;
321
322 struct ureg_src l_addr[2], r_addr[2];
323
324 struct ureg_dst l[2], r[2];
325 struct ureg_dst fragment;
326
327 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
328 if (!shader)
329 return NULL;
330
331 l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
332 l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
333
334 r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
335 r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
336
337 l[0] = ureg_DECL_temporary(shader);
338 l[1] = ureg_DECL_temporary(shader);
339 r[0] = ureg_DECL_temporary(shader);
340 r[1] = ureg_DECL_temporary(shader);
341
342 fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
343 fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
344
345 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
346
347 matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r);
348
349 ureg_release_temporary(shader, l[0]);
350 ureg_release_temporary(shader, l[1]);
351 ureg_release_temporary(shader, r[0]);
352 ureg_release_temporary(shader, r[1]);
353
354 ureg_END(shader);
355
356 return ureg_create_shader_and_destroy(shader, idct->pipe);
357 }
358
359 static bool
360 init_shaders(struct vl_idct *idct)
361 {
362 idct->matrix_vs = create_vert_shader(idct, true);
363 if (!idct->matrix_vs)
364 goto error_matrix_vs;
365
366 idct->matrix_fs = create_matrix_frag_shader(idct);
367 if (!idct->matrix_fs)
368 goto error_matrix_fs;
369
370 idct->transpose_vs = create_vert_shader(idct, false);
371 if (!idct->transpose_vs)
372 goto error_transpose_vs;
373
374 idct->transpose_fs = create_transpose_frag_shader(idct);
375 if (!idct->transpose_fs)
376 goto error_transpose_fs;
377
378 return true;
379
380 error_transpose_fs:
381 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
382
383 error_transpose_vs:
384 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
385
386 error_matrix_fs:
387 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
388
389 error_matrix_vs:
390 return false;
391 }
392
393 static void
394 cleanup_shaders(struct vl_idct *idct)
395 {
396 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
397 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
398 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
399 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
400 }
401
402 static bool
403 init_state(struct vl_idct *idct)
404 {
405 struct pipe_sampler_state sampler;
406 struct pipe_rasterizer_state rs_state;
407 unsigned i;
408
409 assert(idct);
410
411 memset(&rs_state, 0, sizeof(rs_state));
412 rs_state.gl_rasterization_rules = false;
413 idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
414 if (!idct->rs_state)
415 goto error_rs_state;
416
417 for (i = 0; i < 2; ++i) {
418 memset(&sampler, 0, sizeof(sampler));
419 sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
420 sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
421 sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
422 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
423 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
424 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
425 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
426 sampler.compare_func = PIPE_FUNC_ALWAYS;
427 sampler.normalized_coords = 1;
428 idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
429 if (!idct->samplers[i])
430 goto error_samplers;
431 }
432
433 return true;
434
435 error_samplers:
436 for (i = 0; i < 2; ++i)
437 if (idct->samplers[i])
438 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
439
440 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
441
442 error_rs_state:
443 return false;
444 }
445
446 static void
447 cleanup_state(struct vl_idct *idct)
448 {
449 unsigned i;
450
451 for (i = 0; i < 2; ++i)
452 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
453
454 idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
455 }
456
457 static bool
458 init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
459 {
460 struct pipe_resource tex_templ, *tex;
461 struct pipe_sampler_view sv_templ;
462 struct pipe_surface surf_templ;
463 unsigned i;
464
465 assert(idct && buffer);
466
467 memset(&tex_templ, 0, sizeof(tex_templ));
468 tex_templ.target = PIPE_TEXTURE_3D;
469 tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM;
470 tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS;
471 tex_templ.height0 = idct->buffer_height / 4;
472 tex_templ.depth0 = NR_RENDER_TARGETS;
473 tex_templ.array_size = 1;
474 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
475 tex_templ.usage = PIPE_USAGE_STATIC;
476
477 tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ);
478 if (!tex)
479 goto error_tex;
480
481 memset(&sv_templ, 0, sizeof(sv_templ));
482 u_sampler_view_default_template(&sv_templ, tex, tex->format);
483 buffer->sampler_views.individual.intermediate =
484 idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ);
485 if (!buffer->sampler_views.individual.intermediate)
486 goto error_sampler_view;
487
488 buffer->fb_state[0].width = tex->width0;
489 buffer->fb_state[0].height = tex->height0;
490 buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
491 for(i = 0; i < NR_RENDER_TARGETS; ++i) {
492 memset(&surf_templ, 0, sizeof(surf_templ));
493 surf_templ.format = tex->format;
494 surf_templ.u.tex.first_layer = i;
495 surf_templ.u.tex.last_layer = i;
496 surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
497 buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
498 idct->pipe, tex, &surf_templ);
499
500 if (!buffer->fb_state[0].cbufs[i])
501 goto error_surfaces;
502 }
503
504 buffer->viewport[0].scale[0] = tex->width0;
505 buffer->viewport[0].scale[1] = tex->height0;
506
507 pipe_resource_reference(&tex, NULL);
508 return true;
509
510 error_surfaces:
511 for(i = 0; i < NR_RENDER_TARGETS; ++i)
512 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
513
514 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
515
516 error_sampler_view:
517 pipe_resource_reference(&tex, NULL);
518
519 error_tex:
520 return false;
521 }
522
523 static void
524 cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
525 {
526 unsigned i;
527
528 assert(idct && buffer);
529
530 for(i = 0; i < NR_RENDER_TARGETS; ++i)
531 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
532
533 pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
534 }
535
536 struct pipe_sampler_view *
537 vl_idct_upload_matrix(struct pipe_context *pipe)
538 {
539 const float scale = sqrtf(SCALE_FACTOR_16_TO_9);
540
541 struct pipe_resource tex_templ, *matrix;
542 struct pipe_sampler_view sv_templ, *sv;
543 struct pipe_transfer *buf_transfer;
544 unsigned i, j, pitch;
545 float *f;
546
547 struct pipe_box rect =
548 {
549 0, 0, 0,
550 BLOCK_WIDTH / 4,
551 BLOCK_HEIGHT,
552 1
553 };
554
555 assert(pipe);
556
557 memset(&tex_templ, 0, sizeof(tex_templ));
558 tex_templ.target = PIPE_TEXTURE_2D;
559 tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
560 tex_templ.last_level = 0;
561 tex_templ.width0 = 2;
562 tex_templ.height0 = 8;
563 tex_templ.depth0 = 1;
564 tex_templ.array_size = 1;
565 tex_templ.usage = PIPE_USAGE_IMMUTABLE;
566 tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
567 tex_templ.flags = 0;
568
569 matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
570 if (!matrix)
571 goto error_matrix;
572
573 buf_transfer = pipe->get_transfer
574 (
575 pipe, matrix,
576 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
577 &rect
578 );
579 if (!buf_transfer)
580 goto error_transfer;
581
582 pitch = buf_transfer->stride / sizeof(float);
583
584 f = pipe->transfer_map(pipe, buf_transfer);
585 if (!f)
586 goto error_map;
587
588 for(i = 0; i < BLOCK_HEIGHT; ++i)
589 for(j = 0; j < BLOCK_WIDTH; ++j)
590 // transpose and scale
591 f[i * pitch + j] = const_matrix[j][i] * scale;
592
593 pipe->transfer_unmap(pipe, buf_transfer);
594 pipe->transfer_destroy(pipe, buf_transfer);
595
596 memset(&sv_templ, 0, sizeof(sv_templ));
597 u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
598 sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
599 pipe_resource_reference(&matrix, NULL);
600 if (!sv)
601 goto error_map;
602
603 return sv;
604
605 error_map:
606 pipe->transfer_destroy(pipe, buf_transfer);
607
608 error_transfer:
609 pipe_resource_reference(&matrix, NULL);
610
611 error_matrix:
612 return NULL;
613 }
614
615 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
616 unsigned buffer_width, unsigned buffer_height,
617 unsigned blocks_x, unsigned blocks_y,
618 struct pipe_sampler_view *matrix)
619 {
620 assert(idct && pipe && matrix);
621
622 idct->pipe = pipe;
623 idct->buffer_width = buffer_width;
624 idct->buffer_height = buffer_height;
625 idct->blocks_x = blocks_x;
626 idct->blocks_y = blocks_y;
627 pipe_sampler_view_reference(&idct->matrix, matrix);
628
629 if(!init_shaders(idct))
630 return false;
631
632 if(!init_state(idct)) {
633 cleanup_shaders(idct);
634 return false;
635 }
636
637 return true;
638 }
639
640 void
641 vl_idct_cleanup(struct vl_idct *idct)
642 {
643 cleanup_shaders(idct);
644 cleanup_state(idct);
645
646 pipe_sampler_view_reference(&idct->matrix, NULL);
647 }
648
649 bool
650 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
651 struct pipe_sampler_view *source, struct pipe_surface *destination)
652 {
653 unsigned i;
654
655 assert(buffer);
656 assert(idct);
657 assert(source);
658 assert(destination);
659
660 pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
661 pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
662 pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix);
663
664 if (!init_intermediate(idct, buffer))
665 return false;
666
667 /* init state */
668 buffer->fb_state[1].width = destination->texture->width0;
669 buffer->fb_state[1].height = destination->texture->height0;
670 buffer->fb_state[1].nr_cbufs = 1;
671 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination);
672
673 buffer->viewport[1].scale[0] = destination->texture->width0;
674 buffer->viewport[1].scale[1] = destination->texture->height0;
675
676 for(i = 0; i < 2; ++i) {
677 buffer->viewport[i].scale[2] = 1;
678 buffer->viewport[i].scale[3] = 1;
679 buffer->viewport[i].translate[0] = 0;
680 buffer->viewport[i].translate[1] = 0;
681 buffer->viewport[i].translate[2] = 0;
682 buffer->viewport[i].translate[3] = 0;
683
684 buffer->fb_state[i].zsbuf = NULL;
685 }
686
687 return true;
688 }
689
690 void
691 vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
692 {
693 unsigned i;
694
695 assert(idct && buffer);
696
697 for(i = 0; i < NR_RENDER_TARGETS; ++i)
698 pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
699
700 pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
701
702 cleanup_intermediate(idct, buffer);
703 }
704
705 void
706 vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
707 {
708 unsigned num_verts;
709
710 assert(idct);
711 assert(buffer);
712
713 if(num_instances > 0) {
714 num_verts = idct->blocks_x * idct->blocks_y * 4;
715
716 idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
717 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
718
719 /* first stage */
720 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
721 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
722 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
723 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
724 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
725 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
726
727 /* second stage */
728 idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
729 idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
730 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
731 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
732 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
733 util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
734 }
735 }