g3dvl: Use a func instead of large ugly macro to gen per-block verts.
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_math.h>
33 #include <util/u_memory.h>
34 #include <tgsi/tgsi_ureg.h>
35
36 #define DEFAULT_BUF_ALIGNMENT 1
37 #define MACROBLOCK_WIDTH 16
38 #define MACROBLOCK_HEIGHT 16
39 #define BLOCK_WIDTH 8
40 #define BLOCK_HEIGHT 8
41 #define ZERO_BLOCK_NIL -1.0f
42 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
43 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
44
45 struct vertex4f
46 {
47 float x, y, z, w;
48 };
49
50 struct vertex_shader_consts
51 {
52 struct vertex4f denorm;
53 };
54
55 struct fragment_shader_consts
56 {
57 struct vertex4f multiplier;
58 struct vertex4f div;
59 };
60
61 struct vert_stream_0
62 {
63 struct vertex2f pos;
64 struct vertex2f luma_tc;
65 struct vertex2f cb_tc;
66 struct vertex2f cr_tc;
67 };
68
69 enum MACROBLOCK_TYPE
70 {
71 MACROBLOCK_TYPE_INTRA,
72 MACROBLOCK_TYPE_FWD_FRAME_PRED,
73 MACROBLOCK_TYPE_FWD_FIELD_PRED,
74 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
75 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
76 MACROBLOCK_TYPE_BI_FRAME_PRED,
77 MACROBLOCK_TYPE_BI_FIELD_PRED,
78
79 NUM_MACROBLOCK_TYPES
80 };
81
82 static bool
83 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
84 {
85 struct ureg_program *shader;
86 struct ureg_src vpos, vtex[3];
87 struct ureg_dst o_vpos, o_vtex[3];
88 unsigned i;
89
90 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
91 if (!shader)
92 return false;
93
94 vpos = ureg_DECL_vs_input(shader, 0);
95 for (i = 0; i < 3; ++i)
96 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
97 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
98 for (i = 0; i < 3; ++i)
99 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
100
101 /*
102 * o_vpos = vpos
103 * o_vtex[0..2] = vtex[0..2]
104 */
105 ureg_MOV(shader, o_vpos, vpos);
106 for (i = 0; i < 3; ++i)
107 ureg_MOV(shader, o_vtex[i], vtex[i]);
108
109 ureg_END(shader);
110
111 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
112 if (!r->i_vs)
113 return false;
114
115 return true;
116 }
117
118 static bool
119 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
120 {
121 struct ureg_program *shader;
122 struct ureg_src tc[3];
123 struct ureg_src sampler[3];
124 struct ureg_dst texel, temp;
125 struct ureg_dst fragment;
126 unsigned i;
127
128 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
129 if (!shader)
130 return false;
131
132 for (i = 0; i < 3; ++i) {
133 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
134 sampler[i] = ureg_DECL_sampler(shader, i);
135 }
136 texel = ureg_DECL_temporary(shader);
137 temp = ureg_DECL_temporary(shader);
138 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
139
140 /*
141 * texel.r = tex(tc[0], sampler[0])
142 * texel.g = tex(tc[1], sampler[1])
143 * texel.b = tex(tc[2], sampler[2])
144 * fragment = texel * scale
145 */
146 for (i = 0; i < 3; ++i) {
147 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
148 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
149 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
150 }
151 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
152
153 ureg_release_temporary(shader, texel);
154 ureg_release_temporary(shader, temp);
155 ureg_END(shader);
156
157 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
158 if (!r->i_fs)
159 return false;
160
161 return true;
162 }
163
164 static bool
165 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
166 {
167 struct ureg_program *shader;
168 struct ureg_src vpos, vtex[4];
169 struct ureg_dst o_vpos, o_vtex[4];
170 unsigned i;
171
172 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
173 if (!shader)
174 return false;
175
176 vpos = ureg_DECL_vs_input(shader, 0);
177 for (i = 0; i < 4; ++i)
178 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
179 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
180 for (i = 0; i < 4; ++i)
181 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
182
183 /*
184 * o_vpos = vpos
185 * o_vtex[0..2] = vtex[0..2]
186 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
187 */
188 ureg_MOV(shader, o_vpos, vpos);
189 for (i = 0; i < 3; ++i)
190 ureg_MOV(shader, o_vtex[i], vtex[i]);
191 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
192
193 ureg_END(shader);
194
195 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
196 if (!r->p_vs[0])
197 return false;
198
199 return true;
200 }
201
202 static void
203 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
204 {
205 assert(false);
206 }
207
208 static bool
209 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
210 {
211 struct ureg_program *shader;
212 struct ureg_src tc[4];
213 struct ureg_src sampler[4];
214 struct ureg_dst texel, ref;
215 struct ureg_dst fragment;
216 unsigned i;
217
218 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
219 if (!shader)
220 return false;
221
222 for (i = 0; i < 4; ++i) {
223 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
224 sampler[i] = ureg_DECL_sampler(shader, i);
225 }
226 texel = ureg_DECL_temporary(shader);
227 ref = ureg_DECL_temporary(shader);
228 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
229
230 /*
231 * texel.r = tex(tc[0], sampler[0])
232 * texel.g = tex(tc[1], sampler[1])
233 * texel.b = tex(tc[2], sampler[2])
234 * ref = tex(tc[3], sampler[3])
235 * fragment = texel * scale + ref
236 */
237 for (i = 0; i < 3; ++i) {
238 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
239 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
240 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
241 }
242 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
243 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
244
245 ureg_release_temporary(shader, texel);
246 ureg_release_temporary(shader, ref);
247 ureg_END(shader);
248
249 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
250 if (!r->p_fs[0])
251 return false;
252
253 return true;
254 }
255
256 static void
257 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
258 {
259 assert(false);
260 }
261
262 static bool
263 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
264 {
265 struct ureg_program *shader;
266 struct ureg_src vpos, vtex[5];
267 struct ureg_dst o_vpos, o_vtex[5];
268 unsigned i;
269
270 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
271 if (!shader)
272 return false;
273
274 vpos = ureg_DECL_vs_input(shader, 0);
275 for (i = 0; i < 4; ++i)
276 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
277 /* Skip input 5 */
278 vtex[4] = ureg_DECL_vs_input(shader, 6);
279 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
280 for (i = 0; i < 5; ++i)
281 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
282
283 /*
284 * o_vpos = vpos
285 * o_vtex[0..2] = vtex[0..2]
286 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
287 */
288 ureg_MOV(shader, o_vpos, vpos);
289 for (i = 0; i < 3; ++i)
290 ureg_MOV(shader, o_vtex[i], vtex[i]);
291 for (i = 3; i < 5; ++i)
292 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
293
294 ureg_END(shader);
295
296 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
297 if (!r->b_vs[0])
298 return false;
299
300 return true;
301 }
302
303 static void
304 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
305 {
306 assert(false);
307 }
308
309 static bool
310 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
311 {
312 struct ureg_program *shader;
313 struct ureg_src tc[5];
314 struct ureg_src sampler[5];
315 struct ureg_dst texel, ref[2];
316 struct ureg_dst fragment;
317 unsigned i;
318
319 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
320 if (!shader)
321 return false;
322
323 for (i = 0; i < 5; ++i) {
324 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
325 sampler[i] = ureg_DECL_sampler(shader, i);
326 }
327 texel = ureg_DECL_temporary(shader);
328 ref[0] = ureg_DECL_temporary(shader);
329 ref[1] = ureg_DECL_temporary(shader);
330 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
331
332 /*
333 * texel.r = tex(tc[0], sampler[0])
334 * texel.g = tex(tc[1], sampler[1])
335 * texel.b = tex(tc[2], sampler[2])
336 * ref[0..1 = tex(tc[3..4], sampler[3..4])
337 * ref[0] = lerp(ref[0], ref[1], 0.5)
338 * fragment = texel * scale + ref[0]
339 */
340 for (i = 0; i < 3; ++i) {
341 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
342 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
343 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
344 }
345 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
346 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
347 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
348
349 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
350
351 ureg_release_temporary(shader, texel);
352 ureg_release_temporary(shader, ref[0]);
353 ureg_release_temporary(shader, ref[1]);
354 ureg_END(shader);
355
356 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
357 if (!r->b_fs[0])
358 return false;
359
360 return true;
361 }
362
363 static void
364 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
365 {
366 assert(false);
367 }
368
369 static void
370 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
371 {
372 unsigned i;
373
374 assert(r);
375
376 for (i = 0; i < 3; ++i) {
377 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
378 (
379 r->pipe->screen, r->textures.all[i],
380 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
381 r->textures.all[i]->width[0], r->textures.all[i]->height[0]
382 );
383
384 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
385 }
386 }
387
388 static void
389 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
390 {
391 unsigned i;
392
393 assert(r);
394
395 for (i = 0; i < 3; ++i) {
396 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
397 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
398 }
399 }
400
401 static bool
402 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
403 {
404 struct pipe_sampler_state sampler;
405 unsigned filters[5];
406 unsigned i;
407
408 assert(r);
409
410 r->viewport.scale[0] = r->pot_buffers ?
411 util_next_power_of_two(r->picture_width) : r->picture_width;
412 r->viewport.scale[1] = r->pot_buffers ?
413 util_next_power_of_two(r->picture_height) : r->picture_height;
414 r->viewport.scale[2] = 1;
415 r->viewport.scale[3] = 1;
416 r->viewport.translate[0] = 0;
417 r->viewport.translate[1] = 0;
418 r->viewport.translate[2] = 0;
419 r->viewport.translate[3] = 0;
420
421 r->fb_state.width = r->pot_buffers ?
422 util_next_power_of_two(r->picture_width) : r->picture_width;
423 r->fb_state.height = r->pot_buffers ?
424 util_next_power_of_two(r->picture_height) : r->picture_height;
425 r->fb_state.nr_cbufs = 1;
426 r->fb_state.zsbuf = NULL;
427
428 /* Luma filter */
429 filters[0] = PIPE_TEX_FILTER_NEAREST;
430 /* Chroma filters */
431 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
432 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
433 filters[1] = PIPE_TEX_FILTER_NEAREST;
434 filters[2] = PIPE_TEX_FILTER_NEAREST;
435 }
436 else {
437 filters[1] = PIPE_TEX_FILTER_LINEAR;
438 filters[2] = PIPE_TEX_FILTER_LINEAR;
439 }
440 /* Fwd, bkwd ref filters */
441 filters[3] = PIPE_TEX_FILTER_LINEAR;
442 filters[4] = PIPE_TEX_FILTER_LINEAR;
443
444 for (i = 0; i < 5; ++i) {
445 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
446 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
447 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
448 sampler.min_img_filter = filters[i];
449 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
450 sampler.mag_img_filter = filters[i];
451 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
452 sampler.compare_func = PIPE_FUNC_ALWAYS;
453 sampler.normalized_coords = 1;
454 /*sampler.prefilter = ; */
455 /*sampler.shadow_ambient = ; */
456 /*sampler.lod_bias = ; */
457 sampler.min_lod = 0;
458 /*sampler.max_lod = ; */
459 /*sampler.border_color[i] = ; */
460 /*sampler.max_anisotropy = ; */
461 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
462 }
463
464 return true;
465 }
466
467 static void
468 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
469 {
470 unsigned i;
471
472 assert(r);
473
474 for (i = 0; i < 5; ++i)
475 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
476 }
477
478 static bool
479 init_shaders(struct vl_mpeg12_mc_renderer *r)
480 {
481 assert(r);
482
483 create_intra_vert_shader(r);
484 create_intra_frag_shader(r);
485 create_frame_pred_vert_shader(r);
486 create_frame_pred_frag_shader(r);
487 create_frame_bi_pred_vert_shader(r);
488 create_frame_bi_pred_frag_shader(r);
489
490 return true;
491 }
492
493 static void
494 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
495 {
496 assert(r);
497
498 r->pipe->delete_vs_state(r->pipe, r->i_vs);
499 r->pipe->delete_fs_state(r->pipe, r->i_fs);
500 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
501 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
502 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
503 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
504 }
505
506 static bool
507 init_buffers(struct vl_mpeg12_mc_renderer *r)
508 {
509 struct pipe_texture template;
510
511 const unsigned mbw =
512 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
513 const unsigned mbh =
514 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
515
516 unsigned i;
517
518 assert(r);
519
520 r->macroblocks_per_batch =
521 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
522 r->num_macroblocks = 0;
523 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
524
525 memset(&template, 0, sizeof(struct pipe_texture));
526 template.target = PIPE_TEXTURE_2D;
527 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
528 template.format = PIPE_FORMAT_R16_SNORM;
529 template.last_level = 0;
530 template.width[0] = r->pot_buffers ?
531 util_next_power_of_two(r->picture_width) : r->picture_width;
532 template.height[0] = r->pot_buffers ?
533 util_next_power_of_two(r->picture_height) : r->picture_height;
534 template.depth[0] = 1;
535 pf_get_block(template.format, &template.block);
536 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
537
538 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
539
540 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
541 template.width[0] = r->pot_buffers ?
542 util_next_power_of_two(r->picture_width / 2) :
543 r->picture_width / 2;
544 template.height[0] = r->pot_buffers ?
545 util_next_power_of_two(r->picture_height / 2) :
546 r->picture_height / 2;
547 }
548 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
549 template.height[0] = r->pot_buffers ?
550 util_next_power_of_two(r->picture_height / 2) :
551 r->picture_height / 2;
552
553 r->textures.individual.cb =
554 r->pipe->screen->texture_create(r->pipe->screen, &template);
555 r->textures.individual.cr =
556 r->pipe->screen->texture_create(r->pipe->screen, &template);
557
558 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
559 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
560 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
561 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
562 (
563 r->pipe->screen,
564 DEFAULT_BUF_ALIGNMENT,
565 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
566 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
567 );
568
569 for (i = 1; i < 3; ++i) {
570 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
571 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
572 r->vertex_bufs.all[i].buffer_offset = 0;
573 r->vertex_bufs.all[i].buffer = pipe_buffer_create
574 (
575 r->pipe->screen,
576 DEFAULT_BUF_ALIGNMENT,
577 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
578 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
579 );
580 }
581
582 /* Position element */
583 r->vertex_elems[0].src_offset = 0;
584 r->vertex_elems[0].vertex_buffer_index = 0;
585 r->vertex_elems[0].nr_components = 2;
586 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
587
588 /* Luma, texcoord element */
589 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
590 r->vertex_elems[1].vertex_buffer_index = 0;
591 r->vertex_elems[1].nr_components = 2;
592 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
593
594 /* Chroma Cr texcoord element */
595 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
596 r->vertex_elems[2].vertex_buffer_index = 0;
597 r->vertex_elems[2].nr_components = 2;
598 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
599
600 /* Chroma Cb texcoord element */
601 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
602 r->vertex_elems[3].vertex_buffer_index = 0;
603 r->vertex_elems[3].nr_components = 2;
604 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
605
606 /* First ref surface top field texcoord element */
607 r->vertex_elems[4].src_offset = 0;
608 r->vertex_elems[4].vertex_buffer_index = 1;
609 r->vertex_elems[4].nr_components = 2;
610 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
611
612 /* First ref surface bottom field texcoord element */
613 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
614 r->vertex_elems[5].vertex_buffer_index = 1;
615 r->vertex_elems[5].nr_components = 2;
616 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
617
618 /* Second ref surface top field texcoord element */
619 r->vertex_elems[6].src_offset = 0;
620 r->vertex_elems[6].vertex_buffer_index = 2;
621 r->vertex_elems[6].nr_components = 2;
622 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
623
624 /* Second ref surface bottom field texcoord element */
625 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
626 r->vertex_elems[7].vertex_buffer_index = 2;
627 r->vertex_elems[7].nr_components = 2;
628 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
629
630 r->vs_const_buf.buffer = pipe_buffer_create
631 (
632 r->pipe->screen,
633 DEFAULT_BUF_ALIGNMENT,
634 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
635 sizeof(struct vertex_shader_consts)
636 );
637
638 return true;
639 }
640
641 static void
642 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
643 {
644 unsigned i;
645
646 assert(r);
647
648 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
649
650 for (i = 0; i < 3; ++i)
651 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
652
653 for (i = 0; i < 3; ++i)
654 pipe_texture_reference(&r->textures.all[i], NULL);
655
656 FREE(r->macroblock_buf);
657 }
658
659 static enum MACROBLOCK_TYPE
660 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
661 {
662 assert(mb);
663
664 switch (mb->mb_type) {
665 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
666 return MACROBLOCK_TYPE_INTRA;
667 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
668 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
669 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
670 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
671 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
672 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
673 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
674 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
675 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
676 default:
677 assert(0);
678 }
679
680 /* Unreachable */
681 return -1;
682 }
683
684 static void
685 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
686 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
687 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
688 bool use_zeroblocks, struct vertex2f *zero_blocks)
689 {
690 struct vertex2f v;
691
692 assert(vb);
693 assert(unit && half && offset);
694 assert(zero_blocks || !use_zeroblocks);
695
696 /* Generate vertices for two triangles covering a block */
697 v.x = mbx * unit->x + offset->x;
698 v.y = mby * unit->y + offset->y;
699
700 vb[0].pos.x = v.x;
701 vb[0].pos.y = v.y;
702 vb[1].pos.x = v.x;
703 vb[1].pos.y = v.y + half->y;
704 vb[2].pos.x = v.x + half->x;
705 vb[2].pos.y = v.y;
706 vb[3].pos.x = v.x + half->x;
707 vb[3].pos.y = v.y;
708 vb[4].pos.x = v.x;
709 vb[4].pos.y = v.y + half->y;
710 vb[5].pos.x = v.x + half->x;
711 vb[5].pos.y = v.y + half->y;
712
713 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
714 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
715 for this channel is defined for this block) */
716
717 if (!use_zeroblocks || cbp & luma_mask) {
718 v.x = mbx * unit->x + offset->x;
719 v.y = mby * unit->y + offset->y;
720 }
721 else {
722 v.x = zero_blocks[0].x;
723 v.y = zero_blocks[0].y;
724 }
725
726 vb[0].luma_tc.x = v.x;
727 vb[0].luma_tc.y = v.y;
728 vb[1].luma_tc.x = v.x;
729 vb[1].luma_tc.y = v.y + half->y;
730 vb[2].luma_tc.x = v.x + half->x;
731 vb[2].luma_tc.y = v.y;
732 vb[3].luma_tc.x = v.x + half->x;
733 vb[3].luma_tc.y = v.y;
734 vb[4].luma_tc.x = v.x;
735 vb[4].luma_tc.y = v.y + half->y;
736 vb[5].luma_tc.x = v.x + half->x;
737 vb[5].luma_tc.y = v.y + half->y;
738
739 if (!use_zeroblocks || cbp & cb_mask) {
740 v.x = mbx * unit->x + offset->x;
741 v.y = mby * unit->y + offset->y;
742 }
743 else {
744 v.x = zero_blocks[1].x;
745 v.y = zero_blocks[1].y;
746 }
747
748 vb[0].cb_tc.x = v.x;
749 vb[0].cb_tc.y = v.y;
750 vb[1].cb_tc.x = v.x;
751 vb[1].cb_tc.y = v.y + half->y;
752 vb[2].cb_tc.x = v.x + half->x;
753 vb[2].cb_tc.y = v.y;
754 vb[3].cb_tc.x = v.x + half->x;
755 vb[3].cb_tc.y = v.y;
756 vb[4].cb_tc.x = v.x;
757 vb[4].cb_tc.y = v.y + half->y;
758 vb[5].cb_tc.x = v.x + half->x;
759 vb[5].cb_tc.y = v.y + half->y;
760
761 if (!use_zeroblocks || cbp & cr_mask) {
762 v.x = mbx * unit->x + offset->x;
763 v.y = mby * unit->y + offset->y;
764 }
765 else {
766 v.x = zero_blocks[2].x;
767 v.y = zero_blocks[2].y;
768 }
769
770 vb[0].cr_tc.x = v.x;
771 vb[0].cr_tc.y = v.y;
772 vb[1].cr_tc.x = v.x;
773 vb[1].cr_tc.y = v.y + half->y;
774 vb[2].cr_tc.x = v.x + half->x;
775 vb[2].cr_tc.y = v.y;
776 vb[3].cr_tc.x = v.x + half->x;
777 vb[3].cr_tc.y = v.y;
778 vb[4].cr_tc.x = v.x;
779 vb[4].cr_tc.y = v.y + half->y;
780 vb[5].cr_tc.x = v.x + half->x;
781 vb[5].cr_tc.y = v.y + half->y;
782 }
783
784 static void
785 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
786 struct pipe_mpeg12_macroblock *mb, unsigned pos,
787 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
788 {
789 struct vertex2f mo_vec[2];
790
791 unsigned i;
792
793 assert(r);
794 assert(mb);
795 assert(ycbcr_vb);
796 assert(pos < r->macroblocks_per_batch);
797
798 switch (mb->mb_type) {
799 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
800 {
801 struct vertex2f *vb;
802
803 assert(ref_vb && ref_vb[1]);
804
805 vb = ref_vb[1] + pos * 2 * 24;
806
807 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
808 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
809
810 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
811 for (i = 0; i < 24 * 2; i += 2) {
812 vb[i].x = mo_vec[0].x;
813 vb[i].y = mo_vec[0].y;
814 }
815 }
816 else {
817 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
818 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
819
820 for (i = 0; i < 24 * 2; i += 2) {
821 vb[i].x = mo_vec[0].x;
822 vb[i].y = mo_vec[0].y;
823 vb[i + 1].x = mo_vec[1].x;
824 vb[i + 1].y = mo_vec[1].y;
825 }
826 }
827
828 /* fall-through */
829 }
830 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
831 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
832 {
833 struct vertex2f *vb;
834
835 assert(ref_vb && ref_vb[0]);
836
837 vb = ref_vb[0] + pos * 2 * 24;
838
839 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
840 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
841 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
842
843 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
844 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
845 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
846 }
847 }
848 else {
849 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
850 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
851
852 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
853 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
854 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
855 }
856 }
857
858 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
859 for (i = 0; i < 24 * 2; i += 2) {
860 vb[i].x = mo_vec[0].x;
861 vb[i].y = mo_vec[0].y;
862 }
863 }
864 else {
865 for (i = 0; i < 24 * 2; i += 2) {
866 vb[i].x = mo_vec[0].x;
867 vb[i].y = mo_vec[0].y;
868 vb[i + 1].x = mo_vec[1].x;
869 vb[i + 1].y = mo_vec[1].y;
870 }
871 }
872
873 /* fall-through */
874 }
875 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
876 {
877 const struct vertex2f unit =
878 {
879 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
880 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
881 };
882 const struct vertex2f half =
883 {
884 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
885 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
886 };
887 const struct vertex2f offsets[2][2] =
888 {
889 {
890 {0, 0}, {0, half.y}
891 },
892 {
893 {half.x, 0}, {half.x, half.y}
894 }
895 };
896 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
897
898 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
899
900 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
901 &unit, &half, &offsets[0][0],
902 32, 2, 1, use_zb, r->zero_block);
903
904 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
905 &unit, &half, &offsets[1][0],
906 16, 2, 1, use_zb, r->zero_block);
907
908 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
909 &unit, &half, &offsets[0][1],
910 8, 2, 1, use_zb, r->zero_block);
911
912 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
913 &unit, &half, &offsets[1][1],
914 4, 2, 1, use_zb, r->zero_block);
915
916 break;
917 }
918 default:
919 assert(0);
920 }
921 }
922
923 static void
924 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
925 unsigned *num_macroblocks)
926 {
927 unsigned offset[NUM_MACROBLOCK_TYPES];
928 struct vert_stream_0 *ycbcr_vb;
929 struct vertex2f *ref_vb[2];
930 unsigned i;
931
932 assert(r);
933 assert(num_macroblocks);
934
935 for (i = 0; i < r->num_macroblocks; ++i) {
936 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
937 ++num_macroblocks[mb_type];
938 }
939
940 offset[0] = 0;
941
942 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
943 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
944
945 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
946 (
947 r->pipe->screen,
948 r->vertex_bufs.individual.ycbcr.buffer,
949 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
950 );
951
952 for (i = 0; i < 2; ++i)
953 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
954 (
955 r->pipe->screen,
956 r->vertex_bufs.individual.ref[i].buffer,
957 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
958 );
959
960 for (i = 0; i < r->num_macroblocks; ++i) {
961 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
962
963 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
964 ycbcr_vb, ref_vb);
965
966 ++offset[mb_type];
967 }
968
969 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
970 for (i = 0; i < 2; ++i)
971 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
972 }
973
974 static void
975 flush(struct vl_mpeg12_mc_renderer *r)
976 {
977 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
978 unsigned vb_start = 0;
979 struct vertex_shader_consts *vs_consts;
980 unsigned i;
981
982 assert(r);
983 assert(r->num_macroblocks == r->macroblocks_per_batch);
984
985 gen_macroblock_stream(r, num_macroblocks);
986
987 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
988 (
989 r->pipe->screen, r->surface,
990 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
991 );
992
993 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
994 r->pipe->set_viewport_state(r->pipe, &r->viewport);
995
996 vs_consts = pipe_buffer_map
997 (
998 r->pipe->screen, r->vs_const_buf.buffer,
999 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1000 );
1001
1002 vs_consts->denorm.x = r->surface->width[0];
1003 vs_consts->denorm.y = r->surface->height[0];
1004
1005 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1006
1007 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1008 &r->vs_const_buf);
1009
1010 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1011 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1012 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1013 r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all);
1014 r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all);
1015 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1016 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1017
1018 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1019 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1020 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1021 }
1022
1023 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1024 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1025 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1026 r->textures.individual.ref[0] = r->past;
1027 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1028 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1029 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1030 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1031
1032 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1033 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1034 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1035 }
1036
1037 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1038 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1039 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1040 r->textures.individual.ref[0] = r->past;
1041 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1042 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1043 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1044 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1045
1046 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1047 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1048 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1049 }
1050
1051 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1052 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1053 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1054 r->textures.individual.ref[0] = r->future;
1055 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1056 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1057 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1058 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1059
1060 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1061 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1062 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1063 }
1064
1065 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1066 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1067 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1068 r->textures.individual.ref[0] = r->future;
1069 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1070 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1071 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1072 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1073
1074 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1075 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1076 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1077 }
1078
1079 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1080 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1081 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1082 r->textures.individual.ref[0] = r->past;
1083 r->textures.individual.ref[1] = r->future;
1084 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1085 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1086 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1087 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1088
1089 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1090 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1091 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1092 }
1093
1094 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1095 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1096 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1097 r->textures.individual.ref[0] = r->past;
1098 r->textures.individual.ref[1] = r->future;
1099 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1100 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1101 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1102 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1103
1104 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1105 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1106 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1107 }
1108
1109 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1110 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1111
1112 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1113 for (i = 0; i < 3; ++i)
1114 r->zero_block[i].x = ZERO_BLOCK_NIL;
1115
1116 r->num_macroblocks = 0;
1117 }
1118
1119 static void
1120 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1121 {
1122 unsigned y;
1123
1124 assert(src);
1125 assert(dst);
1126
1127 for (y = 0; y < BLOCK_HEIGHT; ++y)
1128 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1129 }
1130
1131 static void
1132 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1133 {
1134 unsigned y;
1135
1136 assert(src);
1137 assert(dst);
1138
1139 for (y = 0; y < BLOCK_HEIGHT; ++y)
1140 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1141 }
1142
1143 static void
1144 fill_zero_block(short *dst, unsigned dst_pitch)
1145 {
1146 unsigned y;
1147
1148 assert(dst);
1149
1150 for (y = 0; y < BLOCK_HEIGHT; ++y)
1151 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1152 }
1153
1154 static void
1155 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1156 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1157 {
1158 unsigned tex_pitch;
1159 short *texels;
1160 unsigned tb = 0, sb = 0;
1161 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1162 unsigned x, y;
1163
1164 assert(r);
1165 assert(blocks);
1166
1167 tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size;
1168 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1169
1170 for (y = 0; y < 2; ++y) {
1171 for (x = 0; x < 2; ++x, ++tb) {
1172 if ((cbp >> (5 - tb)) & 1) {
1173 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1174 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1175 texels + y * tex_pitch * BLOCK_WIDTH +
1176 x * BLOCK_WIDTH, tex_pitch);
1177 }
1178 else {
1179 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1180 texels + y * tex_pitch + x * BLOCK_WIDTH,
1181 tex_pitch);
1182 }
1183
1184 ++sb;
1185 }
1186 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1187 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1188 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1189 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1190 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1191 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1192 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1193 }
1194 }
1195 }
1196 }
1197 }
1198
1199 /* TODO: Implement 422, 444 */
1200 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1201
1202 mbpx /= 2;
1203 mbpy /= 2;
1204
1205 for (tb = 0; tb < 2; ++tb) {
1206 tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size;
1207 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1208
1209 if ((cbp >> (1 - tb)) & 1) {
1210 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1211 ++sb;
1212 }
1213 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1214 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1215 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1216 fill_zero_block(texels, tex_pitch);
1217 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1218 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1219 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1220 }
1221 }
1222 }
1223 }
1224 }
1225
1226 static void
1227 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1228 struct pipe_mpeg12_macroblock *mb)
1229 {
1230 void *blocks;
1231
1232 assert(r);
1233 assert(mb);
1234 assert(r->num_macroblocks < r->macroblocks_per_batch);
1235
1236 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1237 sizeof(struct pipe_mpeg12_macroblock));
1238
1239 blocks = pipe_buffer_map(r->pipe->screen, mb->blocks,
1240 PIPE_BUFFER_USAGE_CPU_READ);
1241 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, blocks);
1242 pipe_buffer_unmap(r->pipe->screen, mb->blocks);
1243
1244 ++r->num_macroblocks;
1245 }
1246
1247 bool
1248 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1249 struct pipe_context *pipe,
1250 unsigned picture_width,
1251 unsigned picture_height,
1252 enum pipe_video_chroma_format chroma_format,
1253 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1254 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1255 bool pot_buffers)
1256 {
1257 unsigned i;
1258
1259 assert(renderer);
1260 assert(pipe);
1261 /* TODO: Implement other policies */
1262 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1263 /* TODO: Implement this */
1264 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1265 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1266 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1267 assert(pot_buffers);
1268
1269 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1270
1271 renderer->pipe = pipe;
1272 renderer->picture_width = picture_width;
1273 renderer->picture_height = picture_height;
1274 renderer->chroma_format = chroma_format;
1275 renderer->bufmode = bufmode;
1276 renderer->eb_handling = eb_handling;
1277 renderer->pot_buffers = pot_buffers;
1278
1279 if (!init_pipe_state(renderer))
1280 return false;
1281 if (!init_shaders(renderer)) {
1282 cleanup_pipe_state(renderer);
1283 return false;
1284 }
1285 if (!init_buffers(renderer)) {
1286 cleanup_shaders(renderer);
1287 cleanup_pipe_state(renderer);
1288 return false;
1289 }
1290
1291 renderer->surface = NULL;
1292 renderer->past = NULL;
1293 renderer->future = NULL;
1294 for (i = 0; i < 3; ++i)
1295 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1296 renderer->num_macroblocks = 0;
1297
1298 xfer_buffers_map(renderer);
1299
1300 return true;
1301 }
1302
1303 void
1304 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1305 {
1306 assert(renderer);
1307
1308 xfer_buffers_unmap(renderer);
1309
1310 cleanup_pipe_state(renderer);
1311 cleanup_shaders(renderer);
1312 cleanup_buffers(renderer);
1313 }
1314
1315 void
1316 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1317 *renderer,
1318 struct pipe_texture *surface,
1319 struct pipe_texture *past,
1320 struct pipe_texture *future,
1321 unsigned num_macroblocks,
1322 struct pipe_mpeg12_macroblock
1323 *mpeg12_macroblocks,
1324 struct pipe_fence_handle **fence)
1325 {
1326 bool new_surface = false;
1327
1328 assert(renderer);
1329 assert(surface);
1330 assert(num_macroblocks);
1331 assert(mpeg12_macroblocks);
1332
1333 if (renderer->surface) {
1334 if (surface != renderer->surface) {
1335 if (renderer->num_macroblocks > 0) {
1336 xfer_buffers_unmap(renderer);
1337 flush(renderer);
1338 }
1339
1340 new_surface = true;
1341 }
1342
1343 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1344 assert(surface != renderer->surface || renderer->past == past);
1345 assert(surface != renderer->surface || renderer->future == future);
1346 }
1347 else
1348 new_surface = true;
1349
1350 if (new_surface) {
1351 renderer->surface = surface;
1352 renderer->past = past;
1353 renderer->future = future;
1354 renderer->fence = fence;
1355 renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
1356 renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
1357 }
1358
1359 while (num_macroblocks) {
1360 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1361 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1362 unsigned i;
1363
1364 for (i = 0; i < num_to_submit; ++i) {
1365 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1366 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1367 }
1368
1369 num_macroblocks -= num_to_submit;
1370
1371 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1372 xfer_buffers_unmap(renderer);
1373 flush(renderer);
1374 xfer_buffers_map(renderer);
1375 /* Next time we get this surface it may have new ref frames */
1376 renderer->surface = NULL;
1377 }
1378 }
1379 }