[g3dvl] join empty blocks to get larger slices
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
1 /**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_idct.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <pipe/p_screen.h>
33 #include <util/u_inlines.h>
34 #include <util/u_sampler.h>
35 #include <util/u_format.h>
36 #include <tgsi/tgsi_ureg.h>
37 #include "vl_types.h"
38
39 #define BLOCK_WIDTH 8
40 #define BLOCK_HEIGHT 8
41
42 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
43
44 #define STAGE1_SCALE 4.0f
45 #define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
46
47 struct vertex_shader_consts
48 {
49 struct vertex4f norm;
50 };
51
52 enum VS_INPUT
53 {
54 VS_I_RECT,
55 VS_I_VPOS,
56
57 NUM_VS_INPUTS
58 };
59
60 enum VS_OUTPUT
61 {
62 VS_O_VPOS,
63 VS_O_BLOCK,
64 VS_O_TEX,
65 VS_O_START
66 };
67
68 static const float const_matrix[8][8] = {
69 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f },
70 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
71 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f },
72 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f },
73 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f },
74 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f },
75 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f },
76 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
77 };
78
79 /* vertices for a quad covering a block */
80 static const struct vertex2f const_quad[4] = {
81 {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
82 };
83
84 static void *
85 create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
86 {
87 struct ureg_program *shader;
88 struct ureg_src scale;
89 struct ureg_src vrect, vpos;
90 struct ureg_dst t_vpos;
91 struct ureg_dst o_vpos, o_block, o_tex, o_start;
92
93 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
94 if (!shader)
95 return NULL;
96
97 t_vpos = ureg_DECL_temporary(shader);
98
99 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
100 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
101
102 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
103
104 /*
105 * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
106 *
107 * t_vpos = vpos + vrect
108 * o_vpos.xy = t_vpos * scale
109 * o_vpos.zw = vpos
110 *
111 * o_block = vrect
112 * o_tex = t_pos
113 * o_start = vpos * scale
114 *
115 */
116 scale = ureg_imm2f(shader,
117 (float)BLOCK_WIDTH / idct->destination->width0,
118 (float)BLOCK_HEIGHT / idct->destination->height0);
119
120 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
121 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
122 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
123 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
124
125 if(calc_src_cords) {
126 o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
127 o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
128 o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
129
130 ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
131 ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
132 ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
133 }
134
135 ureg_release_temporary(shader, t_vpos);
136
137 ureg_END(shader);
138
139 return ureg_create_shader_and_destroy(shader, idct->pipe);
140 }
141
142 static void
143 fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
144 struct ureg_src tc, struct ureg_src sampler,
145 struct ureg_src start, struct ureg_src block, float height)
146 {
147 struct ureg_dst t_tc, tmp;
148 unsigned i, j;
149
150 t_tc = ureg_DECL_temporary(shader);
151 tmp = ureg_DECL_temporary(shader);
152
153 m[0] = ureg_DECL_temporary(shader);
154 m[1] = ureg_DECL_temporary(shader);
155
156 /*
157 * t_tc.x = right_side ? start.x : tc.x
158 * t_tc.y = right_side ? tc.y : start.y
159 * m[0..1].xyzw = tex(t_tc++, sampler)
160 */
161 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
162 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
163 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
164
165 for(i = 0; i < 2; ++i) {
166 for(j = 0; j < 4; ++j) {
167 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
168 ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
169 ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
170
171 if(i != 1 || j != 3) /* skip the last add */
172 ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
173 ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height));
174 }
175 }
176
177 ureg_release_temporary(shader, t_tc);
178 ureg_release_temporary(shader, tmp);
179 }
180
181 static void
182 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
183 struct ureg_src tc, struct ureg_src sampler,
184 struct ureg_src start, bool right_side, float width)
185 {
186 struct ureg_dst t_tc;
187
188 t_tc = ureg_DECL_temporary(shader);
189 m[0] = ureg_DECL_temporary(shader);
190 m[1] = ureg_DECL_temporary(shader);
191
192 /*
193 * t_tc.x = right_side ? start.x : tc.x
194 * t_tc.y = right_side ? tc.y : start.y
195 * m[0..1] = tex(t_tc++, sampler)
196 */
197 if(right_side) {
198 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
199 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
200 } else {
201 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
202 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
203 }
204
205 ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
206 ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
207 ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
208
209 ureg_release_temporary(shader, t_tc);
210 }
211
212 static void
213 matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
214 {
215 struct ureg_dst tmp[2];
216 unsigned i;
217
218 for(i = 0; i < 2; ++i) {
219 tmp[i] = ureg_DECL_temporary(shader);
220 }
221
222 /*
223 * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
224 * dst = tmp[0] + tmp[1]
225 */
226 ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
227 ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
228 ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
229
230 for(i = 0; i < 2; ++i) {
231 ureg_release_temporary(shader, tmp[i]);
232 }
233 }
234
235 static void *
236 create_transpose_frag_shader(struct vl_idct *idct)
237 {
238 struct pipe_resource *transpose = idct->textures.individual.transpose;
239 struct pipe_resource *intermediate = idct->textures.individual.intermediate;
240
241 struct ureg_program *shader;
242
243 struct ureg_src block, tex, sampler[2];
244 struct ureg_src start[2];
245
246 struct ureg_dst m[2][2];
247 struct ureg_dst tmp, fragment;
248
249 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
250 if (!shader)
251 return NULL;
252
253 block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
254 tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
255
256 sampler[0] = ureg_DECL_sampler(shader, 0);
257 sampler[1] = ureg_DECL_sampler(shader, 1);
258
259 start[0] = ureg_imm1f(shader, 0.0f);
260 start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
261
262 fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0);
263 fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0);
264
265 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
266
267 tmp = ureg_DECL_temporary(shader);
268 matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
269 ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
270
271 ureg_release_temporary(shader, tmp);
272 ureg_release_temporary(shader, m[0][0]);
273 ureg_release_temporary(shader, m[0][1]);
274 ureg_release_temporary(shader, m[1][0]);
275 ureg_release_temporary(shader, m[1][1]);
276
277 ureg_END(shader);
278
279 return ureg_create_shader_and_destroy(shader, idct->pipe);
280 }
281
282 static void *
283 create_matrix_frag_shader(struct vl_idct *idct)
284 {
285 struct pipe_resource *matrix = idct->textures.individual.matrix;
286 struct pipe_resource *source = idct->textures.individual.source;
287
288 struct ureg_program *shader;
289
290 struct ureg_src tc[2], sampler[2];
291 struct ureg_src start[2];
292
293 struct ureg_dst l[2], r[2];
294 struct ureg_dst t_tc, tmp, fragment[BLOCK_WIDTH];
295
296 unsigned i;
297
298 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
299 if (!shader)
300 return NULL;
301
302 t_tc = ureg_DECL_temporary(shader);
303 tmp = ureg_DECL_temporary(shader);
304
305 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
306 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
307
308 sampler[0] = ureg_DECL_sampler(shader, 1);
309 sampler[1] = ureg_DECL_sampler(shader, 0);
310
311 start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
312 start[1] = ureg_imm1f(shader, 0.0f);
313
314 for (i = 0; i < BLOCK_WIDTH; ++i)
315 fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
316
317 fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
318 ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
319 ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
320
321 for (i = 0; i < BLOCK_WIDTH; ++i) {
322 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
323 fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
324 matrix_mul(shader, fragment[i], l, r);
325 ureg_release_temporary(shader, r[0]);
326 ureg_release_temporary(shader, r[1]);
327 }
328
329 ureg_release_temporary(shader, t_tc);
330 ureg_release_temporary(shader, tmp);
331 ureg_release_temporary(shader, l[0]);
332 ureg_release_temporary(shader, l[1]);
333
334 ureg_END(shader);
335
336 return ureg_create_shader_and_destroy(shader, idct->pipe);
337 }
338
339 static void *
340 create_empty_block_frag_shader(struct vl_idct *idct)
341 {
342 struct ureg_program *shader;
343 struct ureg_dst fragment;
344
345 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
346 if (!shader)
347 return NULL;
348
349 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
350
351 ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
352
353 ureg_END(shader);
354
355 return ureg_create_shader_and_destroy(shader, idct->pipe);
356 }
357
358 static bool
359 init_shaders(struct vl_idct *idct)
360 {
361 idct->matrix_vs = create_vert_shader(idct, true);
362 idct->matrix_fs = create_matrix_frag_shader(idct);
363
364 idct->transpose_vs = create_vert_shader(idct, true);
365 idct->transpose_fs = create_transpose_frag_shader(idct);
366
367 idct->eb_vs = create_vert_shader(idct, false);
368 idct->eb_fs = create_empty_block_frag_shader(idct);
369
370 return
371 idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
372 idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
373 idct->eb_vs != NULL && idct->eb_fs != NULL;
374 }
375
376 static void
377 cleanup_shaders(struct vl_idct *idct)
378 {
379 idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
380 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
381
382 idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
383 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
384
385 idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
386 idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
387 }
388
389 static bool
390 init_buffers(struct vl_idct *idct)
391 {
392 struct pipe_resource template;
393 struct pipe_sampler_view sampler_view;
394 struct pipe_vertex_element vertex_elems[2];
395 unsigned i;
396
397 idct->max_blocks =
398 align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
399 align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
400 idct->destination->depth0;
401
402 memset(&template, 0, sizeof(struct pipe_resource));
403 template.last_level = 0;
404 template.depth0 = 1;
405 template.bind = PIPE_BIND_SAMPLER_VIEW;
406 template.flags = 0;
407
408 template.target = PIPE_TEXTURE_2D;
409 template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
410 template.width0 = idct->destination->width0 / 4;
411 template.height0 = idct->destination->height0;
412 template.depth0 = 1;
413 template.usage = PIPE_USAGE_STREAM;
414 idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
415
416 template.target = PIPE_TEXTURE_3D;
417 template.format = PIPE_FORMAT_R16_SNORM;
418 template.width0 = idct->destination->width0 / 8;
419 template.depth0 = 8;
420 template.usage = PIPE_USAGE_STATIC;
421 idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
422
423 for (i = 0; i < 4; ++i) {
424 if(idct->textures.all[i] == NULL)
425 return false; /* a texture failed to allocate */
426
427 u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
428 idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
429 }
430
431 idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
432 idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
433 idct->vertex_bufs.individual.quad.buffer_offset = 0;
434 idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
435 (
436 idct->pipe->screen,
437 PIPE_BIND_VERTEX_BUFFER,
438 sizeof(struct vertex2f) * 4 * idct->max_blocks
439 );
440
441 if(idct->vertex_bufs.individual.quad.buffer == NULL)
442 return false;
443
444 idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
445 idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
446 idct->vertex_bufs.individual.pos.buffer_offset = 0;
447 idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
448 (
449 idct->pipe->screen,
450 PIPE_BIND_VERTEX_BUFFER,
451 sizeof(struct vertex2f) * 4 * idct->max_blocks
452 );
453
454 if(idct->vertex_bufs.individual.pos.buffer == NULL)
455 return false;
456
457 /* Rect element */
458 vertex_elems[0].src_offset = 0;
459 vertex_elems[0].instance_divisor = 0;
460 vertex_elems[0].vertex_buffer_index = 0;
461 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
462
463 /* Pos element */
464 vertex_elems[1].src_offset = 0;
465 vertex_elems[1].instance_divisor = 0;
466 vertex_elems[1].vertex_buffer_index = 1;
467 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
468
469 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
470
471 return true;
472 }
473
474 static void
475 cleanup_buffers(struct vl_idct *idct)
476 {
477 unsigned i;
478
479 assert(idct);
480
481 for (i = 0; i < 4; ++i) {
482 pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
483 pipe_resource_reference(&idct->textures.all[i], NULL);
484 }
485
486 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
487 pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
488 pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
489 }
490
491 static void
492 init_constants(struct vl_idct *idct)
493 {
494 struct pipe_transfer *buf_transfer;
495 struct vertex2f *v;
496
497 unsigned i;
498
499 /* quad vectors */
500 v = pipe_buffer_map
501 (
502 idct->pipe,
503 idct->vertex_bufs.individual.quad.buffer,
504 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
505 &buf_transfer
506 );
507 for ( i = 0; i < idct->max_blocks; ++i)
508 memcpy(v + i * 4, &const_quad, sizeof(const_quad));
509 pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
510 }
511
512 static void
513 init_state(struct vl_idct *idct)
514 {
515 struct pipe_sampler_state sampler;
516 unsigned i;
517
518 idct->num_blocks = 0;
519 idct->num_empty_blocks = 0;
520
521 idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
522 idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
523
524 idct->viewport[1].scale[0] = idct->destination->width0;
525 idct->viewport[1].scale[1] = idct->destination->height0;
526
527 idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
528 idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
529
530 idct->fb_state[0].nr_cbufs = 8;
531 for(i = 0; i < 8; ++i) {
532 idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
533 idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
534 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
535 }
536
537 idct->fb_state[1].width = idct->destination->width0;
538 idct->fb_state[1].height = idct->destination->height0;
539
540 idct->fb_state[1].nr_cbufs = 1;
541 idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
542 idct->pipe->screen, idct->destination, 0, 0, 0,
543 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
544
545 for(i = 0; i < 2; ++i) {
546 idct->viewport[i].scale[2] = 1;
547 idct->viewport[i].scale[3] = 1;
548 idct->viewport[i].translate[0] = 0;
549 idct->viewport[i].translate[1] = 0;
550 idct->viewport[i].translate[2] = 0;
551 idct->viewport[i].translate[3] = 0;
552
553 idct->fb_state[i].zsbuf = NULL;
554 }
555
556 for (i = 0; i < 4; ++i) {
557 memset(&sampler, 0, sizeof(sampler));
558 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
559 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
560 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
561 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
562 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
563 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
564 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
565 sampler.compare_func = PIPE_FUNC_ALWAYS;
566 sampler.normalized_coords = 1;
567 /*sampler.shadow_ambient = ; */
568 /*sampler.lod_bias = ; */
569 sampler.min_lod = 0;
570 /*sampler.max_lod = ; */
571 /*sampler.border_color[0] = ; */
572 /*sampler.max_anisotropy = ; */
573 idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
574 }
575 }
576
577 static void
578 cleanup_state(struct vl_idct *idct)
579 {
580 unsigned i;
581
582 for(i = 0; i < 8; ++i) {
583 idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
584 }
585
586 idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]);
587
588 for (i = 0; i < 4; ++i)
589 idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
590 }
591
592 struct pipe_resource *
593 vl_idct_upload_matrix(struct pipe_context *pipe)
594 {
595 struct pipe_resource template, *matrix;
596 struct pipe_transfer *buf_transfer;
597 unsigned i, j, pitch;
598 float *f;
599
600 struct pipe_box rect =
601 {
602 0, 0, 0,
603 BLOCK_WIDTH,
604 BLOCK_HEIGHT,
605 1
606 };
607
608 memset(&template, 0, sizeof(struct pipe_resource));
609 template.target = PIPE_TEXTURE_2D;
610 template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
611 template.last_level = 0;
612 template.width0 = 2;
613 template.height0 = 8;
614 template.depth0 = 1;
615 template.usage = PIPE_USAGE_IMMUTABLE;
616 template.bind = PIPE_BIND_SAMPLER_VIEW;
617 template.flags = 0;
618
619 matrix = pipe->screen->resource_create(pipe->screen, &template);
620
621 /* matrix */
622 buf_transfer = pipe->get_transfer
623 (
624 pipe, matrix,
625 u_subresource(0, 0),
626 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
627 &rect
628 );
629 pitch = buf_transfer->stride / sizeof(float);
630
631 f = pipe->transfer_map(pipe, buf_transfer);
632 for(i = 0; i < BLOCK_HEIGHT; ++i)
633 for(j = 0; j < BLOCK_WIDTH; ++j)
634 f[i * pitch + j] = const_matrix[j][i]; // transpose
635
636 pipe->transfer_unmap(pipe, buf_transfer);
637 pipe->transfer_destroy(pipe, buf_transfer);
638
639 return matrix;
640 }
641
642 static void
643 xfer_buffers_map(struct vl_idct *idct)
644 {
645 struct pipe_box rect =
646 {
647 0, 0, 0,
648 idct->textures.individual.source->width0,
649 idct->textures.individual.source->height0,
650 1
651 };
652
653 idct->tex_transfer = idct->pipe->get_transfer
654 (
655 idct->pipe, idct->textures.individual.source,
656 u_subresource(0, 0),
657 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
658 &rect
659 );
660
661 idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
662
663 idct->vectors = pipe_buffer_map
664 (
665 idct->pipe,
666 idct->vertex_bufs.individual.pos.buffer,
667 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
668 &idct->vec_transfer
669 );
670
671 idct->next_empty_block.l_x = ~1;
672 idct->next_empty_block.l_y = ~1;
673 idct->next_empty_block.r_x = ~1;
674 idct->next_empty_block.r_y = ~1;
675 }
676
677 static void
678 xfer_buffers_unmap(struct vl_idct *idct)
679 {
680 pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
681
682 idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
683 idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
684 }
685
686 bool
687 vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
688 {
689 assert(idct && pipe && dst);
690
691 idct->pipe = pipe;
692 pipe_resource_reference(&idct->textures.individual.matrix, matrix);
693 pipe_resource_reference(&idct->textures.individual.transpose, matrix);
694 pipe_resource_reference(&idct->destination, dst);
695
696 if(!init_buffers(idct))
697 return false;
698
699 if(!init_shaders(idct)) {
700 cleanup_buffers(idct);
701 return false;
702 }
703
704 init_state(idct);
705
706 init_constants(idct);
707 xfer_buffers_map(idct);
708
709 return true;
710 }
711
712 void
713 vl_idct_cleanup(struct vl_idct *idct)
714 {
715 cleanup_shaders(idct);
716 cleanup_buffers(idct);
717
718 cleanup_state(idct);
719
720 pipe_resource_reference(&idct->destination, NULL);
721 }
722
723 static void
724 flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
725 {
726 if (idct->next_empty_block.l_x == ~1 ||
727 idct->next_empty_block.l_y == ~1) {
728
729 idct->next_empty_block.l_x = new_x;
730 idct->next_empty_block.l_y = new_y;
731
732 } else if (idct->next_empty_block.r_x != (new_x - 1) ||
733 idct->next_empty_block.r_y != new_y) {
734
735 struct vertex2f l, r, *v_dst;
736
737 v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
738
739 l.x = idct->next_empty_block.l_x;
740 l.y = idct->next_empty_block.l_y;
741 r.x = idct->next_empty_block.r_x;
742 r.y = idct->next_empty_block.r_y;
743 v_dst[0] = v_dst[3] = l;
744 v_dst[1] = v_dst[2] = r;
745
746 idct->next_empty_block.l_x = new_x;
747 idct->next_empty_block.l_y = new_y;
748 idct->num_empty_blocks++;
749 }
750
751 idct->next_empty_block.r_x = new_x;
752 idct->next_empty_block.r_y = new_y;
753 }
754
755 void
756 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
757 {
758 struct vertex2f v, *v_dst;
759
760 unsigned tex_pitch;
761 short *texels;
762
763 unsigned i;
764
765 assert(idct);
766
767 if(block) {
768 tex_pitch = idct->tex_transfer->stride / sizeof(short);
769 texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
770
771 for (i = 0; i < BLOCK_HEIGHT; ++i)
772 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
773
774 /* non empty blocks fills the vector buffer from left to right */
775 v_dst = idct->vectors + idct->num_blocks * 4;
776
777 idct->num_blocks++;
778
779 v.x = x;
780 v.y = y;
781
782 for (i = 0; i < 4; ++i) {
783 v_dst[i] = v;
784 }
785
786 } else {
787
788 /* while empty blocks fills the vector buffer from right to left */
789 flush_empty_block(idct, x, y);
790 }
791 }
792
793 void
794 vl_idct_flush(struct vl_idct *idct)
795 {
796 flush_empty_block(idct, ~1, ~1);
797 xfer_buffers_unmap(idct);
798
799 if(idct->num_blocks > 0) {
800
801 /* first stage */
802 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
803 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
804
805 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
806 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
807 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
808 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
809 idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
810 idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
811
812 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
813
814 /* second stage */
815 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
816 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
817
818 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
819 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
820 idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
821 idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
822 idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
823 idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
824
825 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
826 }
827
828 if(idct->num_empty_blocks > 0) {
829
830 /* empty block handling */
831 idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
832 idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
833
834 idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
835 idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
836 idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
837 idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
838
839 util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
840 (idct->max_blocks - idct->num_empty_blocks) * 4,
841 idct->num_empty_blocks * 4);
842 }
843
844 idct->num_blocks = 0;
845 idct->num_empty_blocks = 0;
846 xfer_buffers_map(idct);
847 }