use a shadow buffer for vertex data to optimize memory access
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
1 /**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_idct.h"
29 #include "vl_vertex_buffers.h"
30 #include "util/u_draw.h"
31 #include <assert.h>
32 #include <pipe/p_context.h>
33 #include <pipe/p_screen.h>
34 #include <util/u_inlines.h>
35 #include <util/u_sampler.h>
36 #include <util/u_format.h>
37 #include <tgsi/tgsi_ureg.h>
38 #include "vl_types.h"
39
40 #define BLOCK_WIDTH 8
41 #define BLOCK_HEIGHT 8
42
43 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
44
45 #define STAGE1_SCALE 4.0f
46 #define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
47
48 #define NR_RENDER_TARGETS 1
49
50 struct vertex_shader_consts
51 {
52 struct vertex4f norm;
53 };
54
55 enum VS_INPUT
56 {
57 VS_I_RECT,
58 VS_I_VPOS,
59
60 NUM_VS_INPUTS
61 };
62
63 enum VS_OUTPUT
64 {
65 VS_O_VPOS,
66 VS_O_BLOCK,
67 VS_O_TEX,
68 VS_O_START
69 };
70
71 static const float const_matrix[8][8] = {
72 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f },
73 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
74 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f },
75 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f },
76 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f },
77 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f },
78 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f },
79 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
80 };
81
82 static void *
83 create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
84 {
85 struct ureg_program *shader;
86 struct ureg_src scale;
87 struct ureg_src vrect, vpos;
88 struct ureg_dst t_vpos;
89 struct ureg_dst o_vpos, o_block, o_tex, o_start;
90
91 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
92 if (!shader)
93 return NULL;
94
95 t_vpos = ureg_DECL_temporary(shader);
96
97 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
98 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
99
100 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
101
102 /*
103 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
104 *
105 * t_vpos = vpos + vrect
106 * o_vpos.xy = t_vpos * scale
107 * o_vpos.zw = vpos
108 *
109 * o_block = vrect
110 * o_tex = t_pos
111 * o_start = vpos * scale
112 *
113 */
114 scale = ureg_imm2f(shader,
115 (float)BLOCK_WIDTH / idct->destination->width0,
116 (float)BLOCK_HEIGHT / idct->destination->height0);
117
118 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
119 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
120 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
121 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
122
123 if(calc_src_cords) {
124 o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
125 o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
126 o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
127
128 ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
129 ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
130 ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
131 }
132
133 ureg_release_temporary(shader, t_vpos);
134
135 ureg_END(shader);
136
137 return ureg_create_shader_and_destroy(shader, idct->pipe);
138 }
139
140 static void
141 fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
142 struct ureg_src tc, struct ureg_src sampler,
143 struct ureg_src start, struct ureg_src block, float height)
144 {
145 struct ureg_dst t_tc, tmp;
146 unsigned i, j;
147
148 t_tc = ureg_DECL_temporary(shader);
149 tmp = ureg_DECL_temporary(shader);
150
151 m[0] = ureg_DECL_temporary(shader);
152 m[1] = ureg_DECL_temporary(shader);
153
154 /*
155 * t_tc.x = right_side ? start.x : tc.x
156 * t_tc.y = right_side ? tc.y : start.y
157 * m[0..1].xyzw = tex(t_tc++, sampler)
158 */
159 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
160 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
161
162 #if NR_RENDER_TARGETS == 8
163 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
164 #else
165 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
166 #endif
167
168 for(i = 0; i < 2; ++i) {
169 for(j = 0; j < 4; ++j) {
170 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
171 ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
172 ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
173
174 if(i != 1 || j != 3) /* skip the last add */
175 ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
176 ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height));
177 }
178 }
179
180 ureg_release_temporary(shader, t_tc);
181 ureg_release_temporary(shader, tmp);
182 }
183
184 static void
185 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
186 struct ureg_src tc, struct ureg_src sampler,
187 struct ureg_src start, bool right_side, float width)
188 {
189 struct ureg_dst t_tc;
190
191 t_tc = ureg_DECL_temporary(shader);
192 m[0] = ureg_DECL_temporary(shader);
193 m[1] = ureg_DECL_temporary(shader);
194
195 /*
196 * t_tc.x = right_side ? start.x : tc.x
197 * t_tc.y = right_side ? tc.y : start.y
198 * m[0..1] = tex(t_tc++, sampler)
199 */
200 if(right_side) {
201 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
202 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
203 } else {
204 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
205 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
206 }
207
208 ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
209 ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
210 ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
211
212 ureg_release_temporary(shader, t_tc);
213 }
214
215 static void
216 matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
217 {
218 struct ureg_dst tmp[2];
219 unsigned i;
220
221 for(i = 0; i < 2; ++i) {
222 tmp[i] = ureg_DECL_temporary(shader);
223 }
224
225 /*
226 * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
227 * dst = tmp[0] + tmp[1]
228 */
229 ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
230 ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
231 ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
232
233 for(i = 0; i < 2; ++i) {
234 ureg_release_temporary(shader, tmp[i]);
235 }
236 }
237
238 static void *
239 create_transpose_frag_shader(struct vl_idct *idct)
240 {
241 struct pipe_resource *transpose = idct->textures.individual.transpose;
242 struct pipe_resource *intermediate = idct->textures.individual.intermediate;
243
244 struct ureg_program *shader;
245
246 struct ureg_src block, tex, sampler[2];
247 struct ureg_src start[2];
248
249 struct ureg_dst m[2][2];
250 struct ureg_dst tmp, fragment;
251
252 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
253 if (!shader)
254 return NULL;
255
256 block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
257 tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
258
259 sampler[0] = ureg_DECL_sampler(shader, 0);
260 sampler[1] = ureg_DECL_sampler(shader, 1);
261
262 start[0] = ureg_imm1f(shader, 0.0f);
263 start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
264
265 fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0);
266 fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0);
267
268 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
269
270 tmp = ureg_DECL_temporary(shader);
271 matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
272 ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
273
274 ureg_release_temporary(shader, tmp);
275 ureg_release_temporary(shader, m[0][0]);
276 ureg_release_temporary(shader, m[0][1]);
277 ureg_release_temporary(shader, m[1][0]);
278 ureg_release_temporary(shader, m[1][1]);
279
280 ureg_END(shader);
281
282 return ureg_create_shader_and_destroy(shader, idct->pipe);
283 }
284
285 static void *
286 create_matrix_frag_shader(struct vl_idct *idct)
287 {
288 struct pipe_resource *matrix = idct->textures.individual.matrix;
289 struct pipe_resource *source = idct->textures.individual.source;
290
291 struct ureg_program *shader;
292
293 struct ureg_src tc[2], sampler[2];
294 struct ureg_src start[2];
295
296 struct ureg_dst l[2], r[2];
297 struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS];
298
299 unsigned i;
300
301 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
302 if (!shader)
303 return NULL;
304
305 t_tc = ureg_DECL_temporary(shader);
306 tmp = ureg_DECL_temporary(shader);
307
308 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
309 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
310
311 sampler[0] = ureg_DECL_sampler(shader, 1);
312 sampler[1] = ureg_DECL_sampler(shader, 0);
313
314 start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
315 start[1] = ureg_imm1f(shader, 0.0f);
316
317 for (i = 0; i < NR_RENDER_TARGETS; ++i)
318 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
319
320 fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
321 ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
322 ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
323
324 for (i = 0; i < NR_RENDER_TARGETS; ++i) {
325
326 #if NR_RENDER_TARGETS == 8
327 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
328 fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
329 #elif NR_RENDER_TARGETS == 1
330 fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
331 #else
332 #error invalid number of render targets
333 #endif
334
335 matrix_mul(shader, fragment[i], l, r);
336 ureg_release_temporary(shader, r[0]);
337 ureg_release_temporary(shader, r[1]);
338 }
339
340 ureg_release_temporary(shader, t_tc);
341 ureg_release_temporary(shader, tmp);
342 ureg_release_temporary(shader, l[0]);
343 ureg_release_temporary(shader, l[1]);
344
345 ureg_END(shader);
346
347 return ureg_create_shader_and_destroy(shader, idct->pipe);
348 }
349
350 static void *
351 create_empty_block_frag_shader(struct vl_idct *idct)
352 {
353 struct ureg_program *shader;
354 struct ureg_dst fragment;
355
356 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
357 if (!shader)
358 return NULL;
359
360 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
361
362 ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
363
364 ureg_END(shader);
365
366 return ureg_create_shader_and_destroy(shader, idct->pipe);
367 }
368
369 static bool
370 init_shaders(struct vl_idct *idct)
371 {
372 idct->matrix_vs = create_vert_shader(idct, true);
373 idct->matrix_fs = create_matrix_frag_shader(idct);
374
375 idct->transpose_vs = create_vert_shader(idct, true);
376 idct->transpose_fs = create_transpose_frag_shader(idct);
377
378 idct->eb_vs = create_vert_shader(idct, false);
379 idct->eb_fs = create_empty_block_frag_shader(idct);
380
381 return
382 idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
383 idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
384 idct->eb_vs != NULL && idct->eb_fs != NULL;
385 }
386
387 static void
388 cleanup_shaders(struct vl_idct *idct)
389 {
390 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
391 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
392
393 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
394 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
395
396 idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
397 idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
398 }
399
400 static bool
401 init_buffers(struct vl_idct *idct)
402 {
403 struct pipe_resource template;
404 struct pipe_sampler_view sampler_view;
405 struct pipe_vertex_element vertex_elems[2];
406 unsigned i;
407
408 memset(&template, 0, sizeof(struct pipe_resource));
409 template.last_level = 0;
410 template.depth0 = 1;
411 template.bind = PIPE_BIND_SAMPLER_VIEW;
412 template.flags = 0;
413
414 template.target = PIPE_TEXTURE_2D;
415 template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
416 template.width0 = idct->destination->width0 / 4;
417 template.height0 = idct->destination->height0;
418 template.depth0 = 1;
419 template.usage = PIPE_USAGE_STREAM;
420 idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
421
422 template.target = PIPE_TEXTURE_3D;
423 template.format = PIPE_FORMAT_R16_SNORM;
424 template.width0 = idct->destination->width0 / NR_RENDER_TARGETS;
425 template.depth0 = NR_RENDER_TARGETS;
426 template.usage = PIPE_USAGE_STATIC;
427 idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
428
429 for (i = 0; i < 4; ++i) {
430 if(idct->textures.all[i] == NULL)
431 return false; /* a texture failed to allocate */
432
433 u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
434 idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
435 }
436
437 idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
438
439 if(idct->vertex_bufs.individual.quad.buffer == NULL)
440 return false;
441
442 idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
443 idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
444 idct->vertex_bufs.individual.pos.buffer_offset = 0;
445 idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
446 (
447 idct->pipe->screen,
448 PIPE_BIND_VERTEX_BUFFER,
449 sizeof(struct vertex2f) * 4 * idct->max_blocks
450 );
451
452 if(idct->vertex_bufs.individual.pos.buffer == NULL)
453 return false;
454
455 /* Rect element */
456 vertex_elems[0].src_offset = 0;
457 vertex_elems[0].instance_divisor = 0;
458 vertex_elems[0].vertex_buffer_index = 0;
459 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
460
461 /* Pos element */
462 vertex_elems[1].src_offset = 0;
463 vertex_elems[1].instance_divisor = 0;
464 vertex_elems[1].vertex_buffer_index = 1;
465 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
466
467 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
468
469 return true;
470 }
471
472 static void
473 cleanup_buffers(struct vl_idct *idct)
474 {
475 unsigned i;
476
477 assert(idct);
478
479 for (i = 0; i < 4; ++i) {
480 pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
481 pipe_resource_reference(&idct->textures.all[i], NULL);
482 }
483
484 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
485 pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
486 pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
487 }
488
489 static void
490 init_state(struct vl_idct *idct)
491 {
492 struct pipe_sampler_state sampler;
493 unsigned i;
494
495 idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
496 idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
497
498 idct->viewport[1].scale[0] = idct->destination->width0;
499 idct->viewport[1].scale[1] = idct->destination->height0;
500
501 idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
502 idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
503
504 idct->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
505 for(i = 0; i < NR_RENDER_TARGETS; ++i) {
506 idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
507 idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
508 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
509 }
510
511 idct->fb_state[1].width = idct->destination->width0;
512 idct->fb_state[1].height = idct->destination->height0;
513
514 idct->fb_state[1].nr_cbufs = 1;
515 idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
516 idct->pipe->screen, idct->destination, 0, 0, 0,
517 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
518
519 for(i = 0; i < 2; ++i) {
520 idct->viewport[i].scale[2] = 1;
521 idct->viewport[i].scale[3] = 1;
522 idct->viewport[i].translate[0] = 0;
523 idct->viewport[i].translate[1] = 0;
524 idct->viewport[i].translate[2] = 0;
525 idct->viewport[i].translate[3] = 0;
526
527 idct->fb_state[i].zsbuf = NULL;
528 }
529
530 for (i = 0; i < 4; ++i) {
531 memset(&sampler, 0, sizeof(sampler));
532 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
533 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
534 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
535 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
536 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
537 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
538 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
539 sampler.compare_func = PIPE_FUNC_ALWAYS;
540 sampler.normalized_coords = 1;
541 /*sampler.shadow_ambient = ; */
542 /*sampler.lod_bias = ; */
543 sampler.min_lod = 0;
544 /*sampler.max_lod = ; */
545 /*sampler.border_color[0] = ; */
546 /*sampler.max_anisotropy = ; */
547 idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
548 }
549 }
550
551 static void
552 cleanup_state(struct vl_idct *idct)
553 {
554 unsigned i;
555
556 for(i = 0; i < NR_RENDER_TARGETS; ++i) {
557 idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
558 }
559
560 idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]);
561
562 for (i = 0; i < 4; ++i)
563 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
564 }
565
566 struct pipe_resource *
567 vl_idct_upload_matrix(struct pipe_context *pipe)
568 {
569 struct pipe_resource template, *matrix;
570 struct pipe_transfer *buf_transfer;
571 unsigned i, j, pitch;
572 float *f;
573
574 struct pipe_box rect =
575 {
576 0, 0, 0,
577 BLOCK_WIDTH,
578 BLOCK_HEIGHT,
579 1
580 };
581
582 memset(&template, 0, sizeof(struct pipe_resource));
583 template.target = PIPE_TEXTURE_2D;
584 template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
585 template.last_level = 0;
586 template.width0 = 2;
587 template.height0 = 8;
588 template.depth0 = 1;
589 template.usage = PIPE_USAGE_IMMUTABLE;
590 template.bind = PIPE_BIND_SAMPLER_VIEW;
591 template.flags = 0;
592
593 matrix = pipe->screen->resource_create(pipe->screen, &template);
594
595 /* matrix */
596 buf_transfer = pipe->get_transfer
597 (
598 pipe, matrix,
599 u_subresource(0, 0),
600 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
601 &rect
602 );
603 pitch = buf_transfer->stride / sizeof(float);
604
605 f = pipe->transfer_map(pipe, buf_transfer);
606 for(i = 0; i < BLOCK_HEIGHT; ++i)
607 for(j = 0; j < BLOCK_WIDTH; ++j)
608 f[i * pitch + j] = const_matrix[j][i]; // transpose
609
610 pipe->transfer_unmap(pipe, buf_transfer);
611 pipe->transfer_destroy(pipe, buf_transfer);
612
613 return matrix;
614 }
615
616 static void
617 xfer_buffers_map(struct vl_idct *idct)
618 {
619 struct pipe_box rect =
620 {
621 0, 0, 0,
622 idct->textures.individual.source->width0,
623 idct->textures.individual.source->height0,
624 1
625 };
626
627 idct->tex_transfer = idct->pipe->get_transfer
628 (
629 idct->pipe, idct->textures.individual.source,
630 u_subresource(0, 0),
631 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
632 &rect
633 );
634
635 idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
636 }
637
638 static void
639 xfer_buffers_unmap(struct vl_idct *idct)
640 {
641 idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
642 idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
643 }
644
645 bool
646 vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
647 {
648 assert(idct && pipe && dst);
649
650 idct->pipe = pipe;
651 pipe_resource_reference(&idct->textures.individual.matrix, matrix);
652 pipe_resource_reference(&idct->textures.individual.transpose, matrix);
653 pipe_resource_reference(&idct->destination, dst);
654
655 idct->max_blocks =
656 align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
657 align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
658 idct->destination->depth0;
659
660 if(!init_buffers(idct))
661 return false;
662
663 if(!init_shaders(idct)) {
664 cleanup_buffers(idct);
665 return false;
666 }
667
668 if(!vl_vb_init(&idct->blocks, idct->max_blocks)) {
669 cleanup_shaders(idct);
670 cleanup_buffers(idct);
671 return false;
672 }
673
674 if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) {
675 vl_vb_cleanup(&idct->blocks);
676 cleanup_shaders(idct);
677 cleanup_buffers(idct);
678 return false;
679 }
680
681 init_state(idct);
682
683 xfer_buffers_map(idct);
684
685 return true;
686 }
687
688 void
689 vl_idct_cleanup(struct vl_idct *idct)
690 {
691 vl_vb_cleanup(&idct->blocks);
692 vl_vb_cleanup(&idct->empty_blocks);
693 cleanup_shaders(idct);
694 cleanup_buffers(idct);
695
696 cleanup_state(idct);
697
698 pipe_resource_reference(&idct->destination, NULL);
699 }
700
701 void
702 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
703 {
704 unsigned tex_pitch;
705 short *texels;
706
707 unsigned i;
708
709 assert(idct);
710
711 if(block) {
712 tex_pitch = idct->tex_transfer->stride / sizeof(short);
713 texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
714
715 for (i = 0; i < BLOCK_HEIGHT; ++i)
716 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
717
718 vl_vb_add_block(&idct->blocks, false, x, y);
719 } else {
720
721 vl_vb_add_block(&idct->empty_blocks, true, x, y);
722 }
723 }
724
725 void
726 vl_idct_flush(struct vl_idct *idct)
727 {
728 struct pipe_transfer *vec_transfer;
729 struct quadf *vectors;
730 unsigned num_blocks, num_empty_blocks;
731
732 assert(idct);
733
734 vectors = pipe_buffer_map
735 (
736 idct->pipe,
737 idct->vertex_bufs.individual.pos.buffer,
738 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
739 &vec_transfer
740 );
741
742 num_blocks = vl_vb_upload(&idct->blocks, vectors);
743 num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks);
744
745 pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
746
747 xfer_buffers_unmap(idct);
748
749 if(num_blocks > 0) {
750
751 /* first stage */
752 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
753 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
754
755 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
756 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
757 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
758 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
759 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
760 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
761
762 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
763
764 /* second stage */
765 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
766 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
767
768 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
769 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
770 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
771 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
772 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
773 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
774
775 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
776 }
777
778 if(num_empty_blocks > 0) {
779
780 /* empty block handling */
781 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
782 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
783
784 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
785 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
786 idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
787 idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
788
789 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4);
790 }
791
792 xfer_buffers_map(idct);
793 }