[g3dvl] add skeleton and incomplete idct
[mesa.git] / src / gallium / auxiliary / vl / vl_idct.c
1 /**************************************************************************
2 *
3 * Copyright 2010 Christian König
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_idct.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_screen.h>
32 #include <util/u_inlines.h>
33 #include <util/u_sampler.h>
34 #include <util/u_format.h>
35 #include <tgsi/tgsi_ureg.h>
36 #include "vl_types.h"
37
38 #define BLOCK_WIDTH 8
39 #define BLOCK_HEIGHT 8
40 #define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
41 #define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f)
42
43 struct vertex_shader_consts
44 {
45 struct vertex4f norm;
46 };
47
48 enum VS_INPUT
49 {
50 VS_I_RECT,
51 VS_I_VPOS,
52
53 NUM_VS_INPUTS
54 };
55
56 enum VS_OUTPUT
57 {
58 VS_O_VPOS,
59 VS_O_BLOCK,
60 VS_O_TEX,
61 VS_O_START,
62 VS_O_STEP
63 };
64
65
66 const float const_matrix[8][8] = {
67 { 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.3535530f, 0.353553f, 0.3535530f },
68 { 0.4903930f, 0.4157350f, 0.2777850f, 0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
69 { 0.4619400f, 0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f, 0.191342f, 0.4619400f },
70 { 0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f, 0.2777850f, 0.4903930f, 0.097545f, -0.4157350f },
71 { 0.3535530f, -0.3535530f, -0.3535530f, 0.3535540f, 0.3535530f, -0.3535540f, -0.353553f, 0.3535530f },
72 { 0.2777850f, -0.4903930f, 0.0975452f, 0.4157350f, -0.4157350f, -0.0975451f, 0.490393f, -0.2777850f },
73 { 0.1913420f, -0.4619400f, 0.4619400f, -0.1913420f, -0.1913410f, 0.4619400f, -0.461940f, 0.1913420f },
74 { 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
75 };
76
77 const float const_transpose[8][8] = {
78 { 0.3535530f, 0.4903930f, 0.4619400f, 0.4157350f, 0.3535530f, 0.2777850f, 0.191342f, 0.0975451f },
79 { 0.3535530f, 0.4157350f, 0.1913420f, -0.0975452f, -0.3535530f, -0.4903930f, -0.461940f, -0.2777850f },
80 { 0.3535530f, 0.2777850f, -0.1913420f, -0.4903930f, -0.3535530f, 0.0975452f, 0.461940f, 0.4157350f },
81 { 0.3535530f, 0.0975451f, -0.4619400f, -0.2777850f, 0.3535540f, 0.4157350f, -0.191342f, -0.4903930f },
82 { 0.3535530f, -0.0975452f, -0.4619400f, 0.2777850f, 0.3535530f, -0.4157350f, -0.191341f, 0.4903930f },
83 { 0.3535530f, -0.2777850f, -0.1913420f, 0.4903930f, -0.3535540f, -0.0975451f, 0.461940f, -0.4157350f },
84 { 0.3535530f, -0.4157350f, 0.1913420f, 0.0975450f, -0.3535530f, 0.4903930f, -0.461940f, 0.2777860f },
85 { 0.3535530f, -0.4903930f, 0.4619400f, -0.4157350f, 0.3535530f, -0.2777850f, 0.191342f, -0.0975458f }
86 };
87
88 static void *
89 create_vert_shader(struct vl_idct *idct)
90 {
91 struct ureg_program *shader;
92 struct ureg_src norm, bs;
93 struct ureg_src vrect, vpos;
94 struct ureg_dst scale, t_vpos;
95 struct ureg_dst o_vpos, o_block, o_tex, o_start, o_step;
96
97 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
98 if (!shader)
99 return NULL;
100
101 norm = ureg_DECL_constant(shader, 0);
102 bs = ureg_imm2f(shader, BLOCK_WIDTH, BLOCK_HEIGHT);
103
104 scale = ureg_DECL_temporary(shader);
105 t_vpos = ureg_DECL_temporary(shader);
106
107 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
108 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
109
110 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
111 o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
112 o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_TEX);
113 o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_START);
114 o_step = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP);
115
116 /*
117 * scale = norm * mbs;
118 *
119 * t_vpos = vpos + vrect
120 * o_vpos.xy = t_vpos * scale
121 * o_vpos.zw = vpos
122 *
123 * o_block = vrect
124 * o_tex = t_pos
125 * o_start = vpos * scale
126 * o_step = norm
127 *
128 */
129 ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, bs);
130
131 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
132 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
133 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
134 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
135
136 ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
137 ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
138 ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
139 ureg_MOV(shader, ureg_writemask(o_step, TGSI_WRITEMASK_XY), norm);
140
141 ureg_release_temporary(shader, t_vpos);
142 ureg_release_temporary(shader, scale);
143
144 ureg_END(shader);
145
146 return ureg_create_shader_and_destroy(shader, idct->pipe);
147 }
148
149 static void
150 matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
151 struct ureg_src tc[2], struct ureg_src sampler[2],
152 struct ureg_src start[2], struct ureg_src step[2],
153 float scale[2])
154 {
155 struct ureg_dst t_tc[2], m[2][2], tmp[2];
156 unsigned i, j;
157
158 for(i = 0; i < 2; ++i) {
159 t_tc[i] = ureg_DECL_temporary(shader);
160 for(j = 0; j < 2; ++j)
161 m[i][j] = ureg_DECL_temporary(shader);
162 tmp[i] = ureg_DECL_temporary(shader);
163 }
164
165 /*
166 * m[0..1][0] = ?
167 * tmp[0..1] = dot4(m[0..1][0], m[0..1][1])
168 * fragment = tmp[0] + tmp[1]
169 */
170 ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]);
171 ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]);
172
173 ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
174 ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
175
176 for(i = 0; i < 2; ++i) {
177 for(j = 0; j < 4; ++j) {
178 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
179 ureg_TEX(shader, tmp[0], TGSI_TEXTURE_2D, ureg_src(t_tc[0]), sampler[0]);
180 ureg_MOV(shader, ureg_writemask(m[i][0], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X));
181
182 ureg_TEX(shader, tmp[1], TGSI_TEXTURE_2D, ureg_src(t_tc[1]), sampler[1]);
183 ureg_MOV(shader, ureg_writemask(m[i][1], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
184
185 ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_src(t_tc[0]), step[0]);
186 ureg_ADD(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_src(t_tc[1]), step[1]);
187 }
188
189 if(scale[0] != 1.0f)
190 ureg_MUL(shader, m[i][0], ureg_src(m[i][0]), ureg_scalar(ureg_imm1f(shader, scale[0]), TGSI_SWIZZLE_X));
191
192 if(scale[1] != 1.0f)
193 ureg_MUL(shader, m[i][1], ureg_src(m[i][1]), ureg_scalar(ureg_imm1f(shader, scale[1]), TGSI_SWIZZLE_X));
194 }
195
196 ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1]));
197 ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1]));
198 ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
199
200 for(i = 0; i < 2; ++i) {
201 ureg_release_temporary(shader, t_tc[i]);
202 for(j = 0; j < 2; ++j)
203 ureg_release_temporary(shader, m[i][j]);
204 ureg_release_temporary(shader, tmp[i]);
205 }
206 }
207
208 static void *
209 create_transpose_frag_shader(struct vl_idct *idct)
210 {
211 struct ureg_program *shader;
212 struct ureg_src tc[2], sampler[2];
213 struct ureg_src start[2], step[2];
214 struct ureg_dst fragment;
215 float scale[2];
216
217 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
218 if (!shader)
219 return NULL;
220
221 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
222 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
223
224 start[0] = ureg_imm1f(shader, 0.0f);
225 start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
226
227 step[0] = ureg_imm1f(shader, 1.0f / BLOCK_HEIGHT);
228 step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
229
230 sampler[0] = ureg_DECL_sampler(shader, 0);
231 sampler[1] = ureg_DECL_sampler(shader, 1);
232
233 scale[0] = 1.0f;
234 scale[1] = SCALE_FACTOR_16_TO_12;
235
236 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
237
238 matrix_mul(shader, fragment, tc, sampler, start, step, scale);
239
240 ureg_END(shader);
241
242 return ureg_create_shader_and_destroy(shader, idct->pipe);
243 }
244
245 static void *
246 create_matrix_frag_shader(struct vl_idct *idct)
247 {
248 struct ureg_program *shader;
249 struct ureg_src tc[2], sampler[2];
250 struct ureg_src start[2], step[2];
251 struct ureg_dst tmp, fragment;
252 float scale[2];
253
254 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
255 if (!shader)
256 return NULL;
257
258 tmp = ureg_DECL_temporary(shader);
259
260 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
261 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
262
263 start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
264 start[1] = ureg_imm1f(shader, 0.0f);
265
266 step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
267 step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
268
269 sampler[0] = ureg_DECL_sampler(shader, 0);
270 sampler[1] = ureg_DECL_sampler(shader, 1);
271
272 scale[0] = 1.0f;
273 scale[1] = 1.0f;
274
275 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
276
277 matrix_mul(shader, tmp, tc, sampler, start, step, scale);
278 ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
279
280 ureg_END(shader);
281
282 return ureg_create_shader_and_destroy(shader, idct->pipe);
283 }
284
285 static void
286 xfer_buffers_map(struct vl_idct *idct)
287 {
288 struct pipe_box rect =
289 {
290 0, 0, 0,
291 idct->destination->width0,
292 idct->destination->height0,
293 1
294 };
295
296 idct->tex_transfer = idct->pipe->get_transfer
297 (
298 idct->pipe, idct->destination,
299 u_subresource(0, 0),
300 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
301 &rect
302 );
303
304 idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
305 }
306
307 static void
308 xfer_buffers_unmap(struct vl_idct *idct)
309 {
310 idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
311 idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
312 }
313
314 static bool
315 init_shaders(struct vl_idct *idct)
316 {
317 assert(idct);
318
319 assert(idct->vs = create_vert_shader(idct));
320 assert(idct->transpose_fs = create_transpose_frag_shader(idct));
321 assert(idct->matrix_fs = create_matrix_frag_shader(idct));
322
323 return true;
324 }
325
326 static void
327 cleanup_shaders(struct vl_idct *idct)
328 {
329 assert(idct);
330
331 idct->pipe->delete_vs_state(idct->pipe, idct->vs);
332 idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
333 idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
334 }
335
336 static bool
337 init_buffers(struct vl_idct *idct)
338 {
339 struct pipe_resource template;
340 struct pipe_sampler_view sampler_view;
341 struct pipe_vertex_element vertex_elems[2];
342
343 const unsigned max_blocks =
344 align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
345 align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
346 idct->destination->depth0;
347
348 unsigned i;
349
350 memset(&template, 0, sizeof(struct pipe_resource));
351 template.target = PIPE_TEXTURE_2D;
352 template.format = PIPE_FORMAT_R32_FLOAT;
353 template.last_level = 0;
354 template.width0 = 8;
355 template.height0 = 8;
356 template.depth0 = 1;
357 template.usage = PIPE_USAGE_IMMUTABLE;
358 template.bind = PIPE_BIND_SAMPLER_VIEW;
359 template.flags = 0;
360
361 idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
362 idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
363
364 template.format = idct->destination->format;
365 template.width0 = idct->destination->width0;
366 template.height0 = idct->destination->height0;
367 template.depth0 = idct->destination->depth0;
368 template.usage = PIPE_USAGE_DYNAMIC;
369 idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
370
371 template.format = PIPE_FORMAT_R32_FLOAT;
372 template.usage = PIPE_USAGE_STATIC;
373 idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
374
375 for (i = 0; i < 4; ++i) {
376 u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
377 idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
378 }
379
380 idct->quad.stride = sizeof(struct vertex2f);
381 idct->quad.max_index = 4 * max_blocks - 1;
382 idct->quad.buffer_offset = 0;
383 idct->quad.buffer = pipe_buffer_create
384 (
385 idct->pipe->screen,
386 PIPE_BIND_VERTEX_BUFFER,
387 sizeof(struct vertex2f) * 4 * max_blocks
388 );
389
390 idct->pos.stride = sizeof(struct vertex2f);
391 idct->pos.max_index = 4 * max_blocks - 1;
392 idct->pos.buffer_offset = 0;
393 idct->pos.buffer = pipe_buffer_create
394 (
395 idct->pipe->screen,
396 PIPE_BIND_VERTEX_BUFFER,
397 sizeof(struct vertex2f) * 4 * max_blocks
398 );
399
400 /* Rect element */
401 vertex_elems[0].src_offset = 0;
402 vertex_elems[0].instance_divisor = 0;
403 vertex_elems[0].vertex_buffer_index = 0;
404 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
405
406 /* Pos element */
407 vertex_elems[1].src_offset = 0;
408 vertex_elems[1].instance_divisor = 0;
409 vertex_elems[1].vertex_buffer_index = 1;
410 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
411
412 idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
413
414 idct->vs_const_buf = pipe_buffer_create
415 (
416 idct->pipe->screen,
417 PIPE_BIND_CONSTANT_BUFFER,
418 sizeof(struct vertex_shader_consts)
419 );
420
421 return true;
422 }
423
424 static void
425 cleanup_buffers(struct vl_idct *idct)
426 {
427 unsigned i;
428
429 assert(idct);
430
431 pipe_resource_reference(&idct->vs_const_buf, NULL);
432
433 for (i = 0; i < 4; ++i) {
434 pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
435 pipe_resource_reference(&idct->textures.all[i], NULL);
436 }
437
438 idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
439 pipe_resource_reference(&idct->quad.buffer, NULL);
440 pipe_resource_reference(&idct->pos.buffer, NULL);
441 }
442
443 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
444 {
445 assert(idct && pipe && dst);
446
447 idct->pipe = pipe;
448
449 idct->viewport.scale[0] = dst->width0;
450 idct->viewport.scale[1] = dst->height0;
451 idct->viewport.scale[2] = 1;
452 idct->viewport.scale[3] = 1;
453 idct->viewport.translate[0] = 0;
454 idct->viewport.translate[1] = 0;
455 idct->viewport.translate[2] = 0;
456 idct->viewport.translate[3] = 0;
457
458 idct->fb_state.width = dst->width0;
459 idct->fb_state.height = dst->height0;
460 idct->fb_state.nr_cbufs = 1;
461 idct->fb_state.zsbuf = NULL;
462
463 pipe_resource_reference(&idct->destination, dst);
464
465 if(!init_shaders(idct))
466 return false;
467
468 if(!init_buffers(idct)) {
469 cleanup_shaders(idct);
470 return false;
471 }
472
473 xfer_buffers_map(idct);
474
475 return true;
476 }
477
478 void vl_idct_cleanup(struct vl_idct *idct)
479 {
480 cleanup_shaders(idct);
481 cleanup_buffers(idct);
482 }
483
484 void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
485 {
486 unsigned tex_pitch;
487 short *texels;
488 unsigned i;
489
490 assert(idct);
491 assert(block);
492
493 tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
494 texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
495
496 for (i = 0; i < BLOCK_HEIGHT; ++i)
497 memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
498 }
499
500 void vl_idct_flush(struct vl_idct *idct)
501 {
502 xfer_buffers_unmap(idct);
503 // TODO
504 xfer_buffers_map(idct);
505 }