vl: WIP DRI2 support in the winsys.
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <tgsi/tgsi_ureg.h>
36
37 #define DEFAULT_BUF_ALIGNMENT 1
38 #define MACROBLOCK_WIDTH 16
39 #define MACROBLOCK_HEIGHT 16
40 #define BLOCK_WIDTH 8
41 #define BLOCK_HEIGHT 8
42 #define ZERO_BLOCK_NIL -1.0f
43 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
44 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
45
46 struct vertex_shader_consts
47 {
48 struct vertex4f denorm;
49 };
50
51 struct fragment_shader_consts
52 {
53 struct vertex4f multiplier;
54 struct vertex4f div;
55 };
56
57 struct vert_stream_0
58 {
59 struct vertex2f pos;
60 struct vertex2f luma_tc;
61 struct vertex2f cb_tc;
62 struct vertex2f cr_tc;
63 };
64
65 enum MACROBLOCK_TYPE
66 {
67 MACROBLOCK_TYPE_INTRA,
68 MACROBLOCK_TYPE_FWD_FRAME_PRED,
69 MACROBLOCK_TYPE_FWD_FIELD_PRED,
70 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
71 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
72 MACROBLOCK_TYPE_BI_FRAME_PRED,
73 MACROBLOCK_TYPE_BI_FIELD_PRED,
74
75 NUM_MACROBLOCK_TYPES
76 };
77
78 static bool
79 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
80 {
81 struct ureg_program *shader;
82 struct ureg_src vpos, vtex[3];
83 struct ureg_dst o_vpos, o_vtex[3];
84 unsigned i;
85
86 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
87 if (!shader)
88 return false;
89
90 vpos = ureg_DECL_vs_input(shader, 0);
91 for (i = 0; i < 3; ++i)
92 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
93 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
94 for (i = 0; i < 3; ++i)
95 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
96
97 /*
98 * o_vpos = vpos
99 * o_vtex[0..2] = vtex[0..2]
100 */
101 ureg_MOV(shader, o_vpos, vpos);
102 for (i = 0; i < 3; ++i)
103 ureg_MOV(shader, o_vtex[i], vtex[i]);
104
105 ureg_END(shader);
106
107 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
108 if (!r->i_vs)
109 return false;
110
111 return true;
112 }
113
114 static bool
115 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
116 {
117 struct ureg_program *shader;
118 struct ureg_src tc[3];
119 struct ureg_src sampler[3];
120 struct ureg_dst texel, temp;
121 struct ureg_dst fragment;
122 unsigned i;
123
124 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
125 if (!shader)
126 return false;
127
128 for (i = 0; i < 3; ++i) {
129 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
130 sampler[i] = ureg_DECL_sampler(shader, i);
131 }
132 texel = ureg_DECL_temporary(shader);
133 temp = ureg_DECL_temporary(shader);
134 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
135
136 /*
137 * texel.r = tex(tc[0], sampler[0])
138 * texel.g = tex(tc[1], sampler[1])
139 * texel.b = tex(tc[2], sampler[2])
140 * fragment = texel * scale
141 */
142 for (i = 0; i < 3; ++i) {
143 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
144 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
145 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
146 }
147 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
148
149 ureg_release_temporary(shader, texel);
150 ureg_release_temporary(shader, temp);
151 ureg_END(shader);
152
153 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
154 if (!r->i_fs)
155 return false;
156
157 return true;
158 }
159
160 static bool
161 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
162 {
163 struct ureg_program *shader;
164 struct ureg_src vpos, vtex[4];
165 struct ureg_dst o_vpos, o_vtex[4];
166 unsigned i;
167
168 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
169 if (!shader)
170 return false;
171
172 vpos = ureg_DECL_vs_input(shader, 0);
173 for (i = 0; i < 4; ++i)
174 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
175 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
176 for (i = 0; i < 4; ++i)
177 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
178
179 /*
180 * o_vpos = vpos
181 * o_vtex[0..2] = vtex[0..2]
182 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
183 */
184 ureg_MOV(shader, o_vpos, vpos);
185 for (i = 0; i < 3; ++i)
186 ureg_MOV(shader, o_vtex[i], vtex[i]);
187 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
188
189 ureg_END(shader);
190
191 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
192 if (!r->p_vs[0])
193 return false;
194
195 return true;
196 }
197
198 #if 0
199 static void
200 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
201 {
202 assert(false);
203 }
204 #endif
205
206 static bool
207 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
208 {
209 struct ureg_program *shader;
210 struct ureg_src tc[4];
211 struct ureg_src sampler[4];
212 struct ureg_dst texel, ref;
213 struct ureg_dst fragment;
214 unsigned i;
215
216 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
217 if (!shader)
218 return false;
219
220 for (i = 0; i < 4; ++i) {
221 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
222 sampler[i] = ureg_DECL_sampler(shader, i);
223 }
224 texel = ureg_DECL_temporary(shader);
225 ref = ureg_DECL_temporary(shader);
226 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
227
228 /*
229 * texel.r = tex(tc[0], sampler[0])
230 * texel.g = tex(tc[1], sampler[1])
231 * texel.b = tex(tc[2], sampler[2])
232 * ref = tex(tc[3], sampler[3])
233 * fragment = texel * scale + ref
234 */
235 for (i = 0; i < 3; ++i) {
236 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
237 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
238 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
239 }
240 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
241 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
242
243 ureg_release_temporary(shader, texel);
244 ureg_release_temporary(shader, ref);
245 ureg_END(shader);
246
247 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
248 if (!r->p_fs[0])
249 return false;
250
251 return true;
252 }
253
254 #if 0
255 static void
256 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
257 {
258 assert(false);
259 }
260 #endif
261
262 static bool
263 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
264 {
265 struct ureg_program *shader;
266 struct ureg_src vpos, vtex[5];
267 struct ureg_dst o_vpos, o_vtex[5];
268 unsigned i;
269
270 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
271 if (!shader)
272 return false;
273
274 vpos = ureg_DECL_vs_input(shader, 0);
275 for (i = 0; i < 4; ++i)
276 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
277 /* Skip input 5 */
278 vtex[4] = ureg_DECL_vs_input(shader, 6);
279 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
280 for (i = 0; i < 5; ++i)
281 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
282
283 /*
284 * o_vpos = vpos
285 * o_vtex[0..2] = vtex[0..2]
286 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
287 */
288 ureg_MOV(shader, o_vpos, vpos);
289 for (i = 0; i < 3; ++i)
290 ureg_MOV(shader, o_vtex[i], vtex[i]);
291 for (i = 3; i < 5; ++i)
292 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
293
294 ureg_END(shader);
295
296 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
297 if (!r->b_vs[0])
298 return false;
299
300 return true;
301 }
302
303 #if 0
304 static void
305 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
306 {
307 assert(false);
308 }
309 #endif
310
311 static bool
312 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
313 {
314 struct ureg_program *shader;
315 struct ureg_src tc[5];
316 struct ureg_src sampler[5];
317 struct ureg_dst texel, ref[2];
318 struct ureg_dst fragment;
319 unsigned i;
320
321 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
322 if (!shader)
323 return false;
324
325 for (i = 0; i < 5; ++i) {
326 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
327 sampler[i] = ureg_DECL_sampler(shader, i);
328 }
329 texel = ureg_DECL_temporary(shader);
330 ref[0] = ureg_DECL_temporary(shader);
331 ref[1] = ureg_DECL_temporary(shader);
332 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
333
334 /*
335 * texel.r = tex(tc[0], sampler[0])
336 * texel.g = tex(tc[1], sampler[1])
337 * texel.b = tex(tc[2], sampler[2])
338 * ref[0..1 = tex(tc[3..4], sampler[3..4])
339 * ref[0] = lerp(ref[0], ref[1], 0.5)
340 * fragment = texel * scale + ref[0]
341 */
342 for (i = 0; i < 3; ++i) {
343 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
344 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
345 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
346 }
347 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
348 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
349 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
350
351 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
352
353 ureg_release_temporary(shader, texel);
354 ureg_release_temporary(shader, ref[0]);
355 ureg_release_temporary(shader, ref[1]);
356 ureg_END(shader);
357
358 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
359 if (!r->b_fs[0])
360 return false;
361
362 return true;
363 }
364
365 #if 0
366 static void
367 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
368 {
369 assert(false);
370 }
371 #endif
372
373 static void
374 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
375 {
376 unsigned i;
377
378 assert(r);
379
380 for (i = 0; i < 3; ++i) {
381 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
382 (
383 r->pipe->screen, r->textures.all[i],
384 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
385 r->textures.all[i]->width0, r->textures.all[i]->height0
386 );
387
388 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
389 }
390 }
391
392 static void
393 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
394 {
395 unsigned i;
396
397 assert(r);
398
399 for (i = 0; i < 3; ++i) {
400 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
401 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
402 }
403 }
404
405 static bool
406 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
407 {
408 struct pipe_sampler_state sampler;
409 unsigned filters[5];
410 unsigned i;
411
412 assert(r);
413
414 r->viewport.scale[0] = r->pot_buffers ?
415 util_next_power_of_two(r->picture_width) : r->picture_width;
416 r->viewport.scale[1] = r->pot_buffers ?
417 util_next_power_of_two(r->picture_height) : r->picture_height;
418 r->viewport.scale[2] = 1;
419 r->viewport.scale[3] = 1;
420 r->viewport.translate[0] = 0;
421 r->viewport.translate[1] = 0;
422 r->viewport.translate[2] = 0;
423 r->viewport.translate[3] = 0;
424
425 r->fb_state.width = r->pot_buffers ?
426 util_next_power_of_two(r->picture_width) : r->picture_width;
427 r->fb_state.height = r->pot_buffers ?
428 util_next_power_of_two(r->picture_height) : r->picture_height;
429 r->fb_state.nr_cbufs = 1;
430 r->fb_state.zsbuf = NULL;
431
432 /* Luma filter */
433 filters[0] = PIPE_TEX_FILTER_NEAREST;
434 /* Chroma filters */
435 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
436 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
437 filters[1] = PIPE_TEX_FILTER_NEAREST;
438 filters[2] = PIPE_TEX_FILTER_NEAREST;
439 }
440 else {
441 filters[1] = PIPE_TEX_FILTER_LINEAR;
442 filters[2] = PIPE_TEX_FILTER_LINEAR;
443 }
444 /* Fwd, bkwd ref filters */
445 filters[3] = PIPE_TEX_FILTER_LINEAR;
446 filters[4] = PIPE_TEX_FILTER_LINEAR;
447
448 for (i = 0; i < 5; ++i) {
449 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
450 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
451 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
452 sampler.min_img_filter = filters[i];
453 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
454 sampler.mag_img_filter = filters[i];
455 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
456 sampler.compare_func = PIPE_FUNC_ALWAYS;
457 sampler.normalized_coords = 1;
458 /*sampler.shadow_ambient = ; */
459 /*sampler.lod_bias = ; */
460 sampler.min_lod = 0;
461 /*sampler.max_lod = ; */
462 /*sampler.border_color[i] = ; */
463 /*sampler.max_anisotropy = ; */
464 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
465 }
466
467 return true;
468 }
469
470 static void
471 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
472 {
473 unsigned i;
474
475 assert(r);
476
477 for (i = 0; i < 5; ++i)
478 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
479 }
480
481 static bool
482 init_shaders(struct vl_mpeg12_mc_renderer *r)
483 {
484 assert(r);
485
486 create_intra_vert_shader(r);
487 create_intra_frag_shader(r);
488 create_frame_pred_vert_shader(r);
489 create_frame_pred_frag_shader(r);
490 create_frame_bi_pred_vert_shader(r);
491 create_frame_bi_pred_frag_shader(r);
492
493 return true;
494 }
495
496 static void
497 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
498 {
499 assert(r);
500
501 r->pipe->delete_vs_state(r->pipe, r->i_vs);
502 r->pipe->delete_fs_state(r->pipe, r->i_fs);
503 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
504 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
505 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
506 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
507 }
508
509 static bool
510 init_buffers(struct vl_mpeg12_mc_renderer *r)
511 {
512 struct pipe_texture template;
513
514 const unsigned mbw =
515 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
516 const unsigned mbh =
517 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
518
519 unsigned i;
520
521 assert(r);
522
523 r->macroblocks_per_batch =
524 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
525 r->num_macroblocks = 0;
526 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
527
528 memset(&template, 0, sizeof(struct pipe_texture));
529 template.target = PIPE_TEXTURE_2D;
530 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
531 template.format = PIPE_FORMAT_R16_SNORM;
532 template.last_level = 0;
533 template.width0 = r->pot_buffers ?
534 util_next_power_of_two(r->picture_width) : r->picture_width;
535 template.height0 = r->pot_buffers ?
536 util_next_power_of_two(r->picture_height) : r->picture_height;
537 template.depth0 = 1;
538 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
539
540 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
541
542 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
543 template.width0 = r->pot_buffers ?
544 util_next_power_of_two(r->picture_width / 2) :
545 r->picture_width / 2;
546 template.height0 = r->pot_buffers ?
547 util_next_power_of_two(r->picture_height / 2) :
548 r->picture_height / 2;
549 }
550 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
551 template.height0 = r->pot_buffers ?
552 util_next_power_of_two(r->picture_height / 2) :
553 r->picture_height / 2;
554
555 r->textures.individual.cb =
556 r->pipe->screen->texture_create(r->pipe->screen, &template);
557 r->textures.individual.cr =
558 r->pipe->screen->texture_create(r->pipe->screen, &template);
559
560 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
561 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
562 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
563 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
564 (
565 r->pipe->screen,
566 DEFAULT_BUF_ALIGNMENT,
567 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
568 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
569 );
570
571 for (i = 1; i < 3; ++i) {
572 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
573 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
574 r->vertex_bufs.all[i].buffer_offset = 0;
575 r->vertex_bufs.all[i].buffer = pipe_buffer_create
576 (
577 r->pipe->screen,
578 DEFAULT_BUF_ALIGNMENT,
579 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
580 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
581 );
582 }
583
584 /* Position element */
585 r->vertex_elems[0].src_offset = 0;
586 r->vertex_elems[0].instance_divisor = 0;
587 r->vertex_elems[0].vertex_buffer_index = 0;
588 r->vertex_elems[0].nr_components = 2;
589 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
590
591 /* Luma, texcoord element */
592 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
593 r->vertex_elems[1].instance_divisor = 0;
594 r->vertex_elems[1].vertex_buffer_index = 0;
595 r->vertex_elems[1].nr_components = 2;
596 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
597
598 /* Chroma Cr texcoord element */
599 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
600 r->vertex_elems[2].instance_divisor = 0;
601 r->vertex_elems[2].vertex_buffer_index = 0;
602 r->vertex_elems[2].nr_components = 2;
603 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
604
605 /* Chroma Cb texcoord element */
606 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
607 r->vertex_elems[3].instance_divisor = 0;
608 r->vertex_elems[3].vertex_buffer_index = 0;
609 r->vertex_elems[3].nr_components = 2;
610 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
611
612 /* First ref surface top field texcoord element */
613 r->vertex_elems[4].src_offset = 0;
614 r->vertex_elems[4].instance_divisor = 0;
615 r->vertex_elems[4].vertex_buffer_index = 1;
616 r->vertex_elems[4].nr_components = 2;
617 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
618
619 /* First ref surface bottom field texcoord element */
620 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
621 r->vertex_elems[5].instance_divisor = 0;
622 r->vertex_elems[5].vertex_buffer_index = 1;
623 r->vertex_elems[5].nr_components = 2;
624 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
625
626 /* Second ref surface top field texcoord element */
627 r->vertex_elems[6].src_offset = 0;
628 r->vertex_elems[6].instance_divisor = 0;
629 r->vertex_elems[6].vertex_buffer_index = 2;
630 r->vertex_elems[6].nr_components = 2;
631 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
632
633 /* Second ref surface bottom field texcoord element */
634 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
635 r->vertex_elems[7].instance_divisor = 0;
636 r->vertex_elems[7].vertex_buffer_index = 2;
637 r->vertex_elems[7].nr_components = 2;
638 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
639
640 r->vs_const_buf = pipe_buffer_create
641 (
642 r->pipe->screen,
643 DEFAULT_BUF_ALIGNMENT,
644 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
645 sizeof(struct vertex_shader_consts)
646 );
647
648 return true;
649 }
650
651 static void
652 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
653 {
654 unsigned i;
655
656 assert(r);
657
658 pipe_buffer_reference(&r->vs_const_buf, NULL);
659
660 for (i = 0; i < 3; ++i)
661 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
662
663 for (i = 0; i < 3; ++i)
664 pipe_texture_reference(&r->textures.all[i], NULL);
665
666 FREE(r->macroblock_buf);
667 }
668
669 static enum MACROBLOCK_TYPE
670 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
671 {
672 assert(mb);
673
674 switch (mb->mb_type) {
675 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
676 return MACROBLOCK_TYPE_INTRA;
677 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
678 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
679 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
680 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
681 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
682 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
683 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
684 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
685 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
686 default:
687 assert(0);
688 }
689
690 /* Unreachable */
691 return -1;
692 }
693
694 static void
695 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
696 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
697 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
698 bool use_zeroblocks, struct vertex2f *zero_blocks)
699 {
700 struct vertex2f v;
701
702 assert(vb);
703 assert(unit && half && offset);
704 assert(zero_blocks || !use_zeroblocks);
705
706 /* Generate vertices for two triangles covering a block */
707 v.x = mbx * unit->x + offset->x;
708 v.y = mby * unit->y + offset->y;
709
710 vb[0].pos.x = v.x;
711 vb[0].pos.y = v.y;
712 vb[1].pos.x = v.x;
713 vb[1].pos.y = v.y + half->y;
714 vb[2].pos.x = v.x + half->x;
715 vb[2].pos.y = v.y;
716 vb[3].pos.x = v.x + half->x;
717 vb[3].pos.y = v.y;
718 vb[4].pos.x = v.x;
719 vb[4].pos.y = v.y + half->y;
720 vb[5].pos.x = v.x + half->x;
721 vb[5].pos.y = v.y + half->y;
722
723 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
724 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
725 for this channel is defined for this block) */
726
727 if (!use_zeroblocks || cbp & luma_mask) {
728 v.x = mbx * unit->x + offset->x;
729 v.y = mby * unit->y + offset->y;
730 }
731 else {
732 v.x = zero_blocks[0].x;
733 v.y = zero_blocks[0].y;
734 }
735
736 vb[0].luma_tc.x = v.x;
737 vb[0].luma_tc.y = v.y;
738 vb[1].luma_tc.x = v.x;
739 vb[1].luma_tc.y = v.y + half->y;
740 vb[2].luma_tc.x = v.x + half->x;
741 vb[2].luma_tc.y = v.y;
742 vb[3].luma_tc.x = v.x + half->x;
743 vb[3].luma_tc.y = v.y;
744 vb[4].luma_tc.x = v.x;
745 vb[4].luma_tc.y = v.y + half->y;
746 vb[5].luma_tc.x = v.x + half->x;
747 vb[5].luma_tc.y = v.y + half->y;
748
749 if (!use_zeroblocks || cbp & cb_mask) {
750 v.x = mbx * unit->x + offset->x;
751 v.y = mby * unit->y + offset->y;
752 }
753 else {
754 v.x = zero_blocks[1].x;
755 v.y = zero_blocks[1].y;
756 }
757
758 vb[0].cb_tc.x = v.x;
759 vb[0].cb_tc.y = v.y;
760 vb[1].cb_tc.x = v.x;
761 vb[1].cb_tc.y = v.y + half->y;
762 vb[2].cb_tc.x = v.x + half->x;
763 vb[2].cb_tc.y = v.y;
764 vb[3].cb_tc.x = v.x + half->x;
765 vb[3].cb_tc.y = v.y;
766 vb[4].cb_tc.x = v.x;
767 vb[4].cb_tc.y = v.y + half->y;
768 vb[5].cb_tc.x = v.x + half->x;
769 vb[5].cb_tc.y = v.y + half->y;
770
771 if (!use_zeroblocks || cbp & cr_mask) {
772 v.x = mbx * unit->x + offset->x;
773 v.y = mby * unit->y + offset->y;
774 }
775 else {
776 v.x = zero_blocks[2].x;
777 v.y = zero_blocks[2].y;
778 }
779
780 vb[0].cr_tc.x = v.x;
781 vb[0].cr_tc.y = v.y;
782 vb[1].cr_tc.x = v.x;
783 vb[1].cr_tc.y = v.y + half->y;
784 vb[2].cr_tc.x = v.x + half->x;
785 vb[2].cr_tc.y = v.y;
786 vb[3].cr_tc.x = v.x + half->x;
787 vb[3].cr_tc.y = v.y;
788 vb[4].cr_tc.x = v.x;
789 vb[4].cr_tc.y = v.y + half->y;
790 vb[5].cr_tc.x = v.x + half->x;
791 vb[5].cr_tc.y = v.y + half->y;
792 }
793
794 static void
795 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
796 struct pipe_mpeg12_macroblock *mb, unsigned pos,
797 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
798 {
799 struct vertex2f mo_vec[2];
800
801 unsigned i;
802
803 assert(r);
804 assert(mb);
805 assert(ycbcr_vb);
806 assert(pos < r->macroblocks_per_batch);
807
808 mo_vec[1].x = 0;
809 mo_vec[1].y = 0;
810
811 switch (mb->mb_type) {
812 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
813 {
814 struct vertex2f *vb;
815
816 assert(ref_vb && ref_vb[1]);
817
818 vb = ref_vb[1] + pos * 2 * 24;
819
820 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
821 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
822
823 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
824 for (i = 0; i < 24 * 2; i += 2) {
825 vb[i].x = mo_vec[0].x;
826 vb[i].y = mo_vec[0].y;
827 }
828 }
829 else {
830 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
831 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
832
833 for (i = 0; i < 24 * 2; i += 2) {
834 vb[i].x = mo_vec[0].x;
835 vb[i].y = mo_vec[0].y;
836 vb[i + 1].x = mo_vec[1].x;
837 vb[i + 1].y = mo_vec[1].y;
838 }
839 }
840
841 /* fall-through */
842 }
843 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
844 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
845 {
846 struct vertex2f *vb;
847
848 assert(ref_vb && ref_vb[0]);
849
850 vb = ref_vb[0] + pos * 2 * 24;
851
852 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
853 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
854 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
855
856 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
857 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
858 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
859 }
860 }
861 else {
862 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
863 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
864
865 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
866 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
867 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
868 }
869 }
870
871 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
872 for (i = 0; i < 24 * 2; i += 2) {
873 vb[i].x = mo_vec[0].x;
874 vb[i].y = mo_vec[0].y;
875 }
876 }
877 else {
878 for (i = 0; i < 24 * 2; i += 2) {
879 vb[i].x = mo_vec[0].x;
880 vb[i].y = mo_vec[0].y;
881 vb[i + 1].x = mo_vec[1].x;
882 vb[i + 1].y = mo_vec[1].y;
883 }
884 }
885
886 /* fall-through */
887 }
888 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
889 {
890 const struct vertex2f unit =
891 {
892 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
893 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
894 };
895 const struct vertex2f half =
896 {
897 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
898 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
899 };
900 const struct vertex2f offsets[2][2] =
901 {
902 {
903 {0, 0}, {0, half.y}
904 },
905 {
906 {half.x, 0}, {half.x, half.y}
907 }
908 };
909 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
910
911 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
912
913 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
914 &unit, &half, &offsets[0][0],
915 32, 2, 1, use_zb, r->zero_block);
916
917 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
918 &unit, &half, &offsets[1][0],
919 16, 2, 1, use_zb, r->zero_block);
920
921 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
922 &unit, &half, &offsets[0][1],
923 8, 2, 1, use_zb, r->zero_block);
924
925 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
926 &unit, &half, &offsets[1][1],
927 4, 2, 1, use_zb, r->zero_block);
928
929 break;
930 }
931 default:
932 assert(0);
933 }
934 }
935
936 static void
937 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
938 unsigned *num_macroblocks)
939 {
940 unsigned offset[NUM_MACROBLOCK_TYPES];
941 struct vert_stream_0 *ycbcr_vb;
942 struct vertex2f *ref_vb[2];
943 unsigned i;
944
945 assert(r);
946 assert(num_macroblocks);
947
948 for (i = 0; i < r->num_macroblocks; ++i) {
949 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
950 ++num_macroblocks[mb_type];
951 }
952
953 offset[0] = 0;
954
955 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
956 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
957
958 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
959 (
960 r->pipe->screen,
961 r->vertex_bufs.individual.ycbcr.buffer,
962 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
963 );
964
965 for (i = 0; i < 2; ++i)
966 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
967 (
968 r->pipe->screen,
969 r->vertex_bufs.individual.ref[i].buffer,
970 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
971 );
972
973 for (i = 0; i < r->num_macroblocks; ++i) {
974 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
975
976 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
977 ycbcr_vb, ref_vb);
978
979 ++offset[mb_type];
980 }
981
982 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
983 for (i = 0; i < 2; ++i)
984 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
985 }
986
987 static void
988 flush(struct vl_mpeg12_mc_renderer *r)
989 {
990 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
991 unsigned vb_start = 0;
992 struct vertex_shader_consts *vs_consts;
993 unsigned i;
994
995 assert(r);
996 assert(r->num_macroblocks == r->macroblocks_per_batch);
997
998 gen_macroblock_stream(r, num_macroblocks);
999
1000 r->fb_state.cbufs[0] = r->surface;
1001
1002 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1003 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1004
1005 vs_consts = pipe_buffer_map
1006 (
1007 r->pipe->screen, r->vs_const_buf,
1008 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1009 );
1010
1011 vs_consts->denorm.x = r->surface->width;
1012 vs_consts->denorm.y = r->surface->height;
1013
1014 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
1015
1016 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1017 r->vs_const_buf);
1018
1019 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1020 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1021 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1022 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1023 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1024 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1025 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1026
1027 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1028 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1029 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1030 }
1031
1032 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1033 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1034 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1035 r->textures.individual.ref[0] = r->past->texture;
1036 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1037 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1038 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1039 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1040
1041 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1042 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1043 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1044 }
1045
1046 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1047 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1048 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1049 r->textures.individual.ref[0] = r->past->texture;
1050 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1051 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1052 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1053 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1054
1055 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1056 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1057 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1058 }
1059
1060 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1061 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1062 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1063 r->textures.individual.ref[0] = r->future->texture;
1064 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1065 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1066 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1067 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1068
1069 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1070 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1071 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1072 }
1073
1074 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1075 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1076 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1077 r->textures.individual.ref[0] = r->future->texture;
1078 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1079 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1080 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1081 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1082
1083 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1084 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1085 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1086 }
1087
1088 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1089 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1090 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1091 r->textures.individual.ref[0] = r->past->texture;
1092 r->textures.individual.ref[1] = r->future->texture;
1093 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1094 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1095 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1096 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1097
1098 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1099 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1100 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1101 }
1102
1103 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1104 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1105 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1106 r->textures.individual.ref[0] = r->past->texture;
1107 r->textures.individual.ref[1] = r->future->texture;
1108 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1109 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1110 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1111 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1112
1113 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1114 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1115 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1116 }
1117
1118 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1119
1120 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1121 for (i = 0; i < 3; ++i)
1122 r->zero_block[i].x = ZERO_BLOCK_NIL;
1123
1124 r->num_macroblocks = 0;
1125 }
1126
1127 static void
1128 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1129 {
1130 unsigned y;
1131
1132 assert(src);
1133 assert(dst);
1134
1135 for (y = 0; y < BLOCK_HEIGHT; ++y)
1136 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1137 }
1138
1139 static void
1140 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1141 {
1142 unsigned y;
1143
1144 assert(src);
1145 assert(dst);
1146
1147 for (y = 0; y < BLOCK_HEIGHT; ++y)
1148 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1149 }
1150
1151 static void
1152 fill_zero_block(short *dst, unsigned dst_pitch)
1153 {
1154 unsigned y;
1155
1156 assert(dst);
1157
1158 for (y = 0; y < BLOCK_HEIGHT; ++y)
1159 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1160 }
1161
1162 static void
1163 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1164 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1165 {
1166 unsigned tex_pitch;
1167 short *texels;
1168 unsigned tb = 0, sb = 0;
1169 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1170 unsigned x, y;
1171
1172 assert(r);
1173 assert(blocks);
1174
1175 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
1176 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1177
1178 for (y = 0; y < 2; ++y) {
1179 for (x = 0; x < 2; ++x, ++tb) {
1180 if ((cbp >> (5 - tb)) & 1) {
1181 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1182 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1183 texels + y * tex_pitch * BLOCK_WIDTH +
1184 x * BLOCK_WIDTH, tex_pitch);
1185 }
1186 else {
1187 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1188 texels + y * tex_pitch + x * BLOCK_WIDTH,
1189 tex_pitch);
1190 }
1191
1192 ++sb;
1193 }
1194 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1195 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1196 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1197 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1198 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1199 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1200 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1201 }
1202 }
1203 }
1204 }
1205 }
1206
1207 /* TODO: Implement 422, 444 */
1208 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1209
1210 mbpx /= 2;
1211 mbpy /= 2;
1212
1213 for (tb = 0; tb < 2; ++tb) {
1214 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1215 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1216
1217 if ((cbp >> (1 - tb)) & 1) {
1218 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1219 ++sb;
1220 }
1221 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1222 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1223 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1224 fill_zero_block(texels, tex_pitch);
1225 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1226 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1227 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1228 }
1229 }
1230 }
1231 }
1232 }
1233
1234 static void
1235 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1236 struct pipe_mpeg12_macroblock *mb)
1237 {
1238 assert(r);
1239 assert(mb);
1240 assert(mb->blocks);
1241 assert(r->num_macroblocks < r->macroblocks_per_batch);
1242
1243 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1244 sizeof(struct pipe_mpeg12_macroblock));
1245
1246 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1247
1248 ++r->num_macroblocks;
1249 }
1250
1251 bool
1252 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1253 struct pipe_context *pipe,
1254 unsigned picture_width,
1255 unsigned picture_height,
1256 enum pipe_video_chroma_format chroma_format,
1257 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1258 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1259 bool pot_buffers)
1260 {
1261 unsigned i;
1262
1263 assert(renderer);
1264 assert(pipe);
1265 /* TODO: Implement other policies */
1266 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1267 /* TODO: Implement this */
1268 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1269 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1270 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1271 assert(pot_buffers);
1272
1273 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1274
1275 renderer->pipe = pipe;
1276 renderer->picture_width = picture_width;
1277 renderer->picture_height = picture_height;
1278 renderer->chroma_format = chroma_format;
1279 renderer->bufmode = bufmode;
1280 renderer->eb_handling = eb_handling;
1281 renderer->pot_buffers = pot_buffers;
1282
1283 if (!init_pipe_state(renderer))
1284 return false;
1285 if (!init_shaders(renderer)) {
1286 cleanup_pipe_state(renderer);
1287 return false;
1288 }
1289 if (!init_buffers(renderer)) {
1290 cleanup_shaders(renderer);
1291 cleanup_pipe_state(renderer);
1292 return false;
1293 }
1294
1295 renderer->surface = NULL;
1296 renderer->past = NULL;
1297 renderer->future = NULL;
1298 for (i = 0; i < 3; ++i)
1299 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1300 renderer->num_macroblocks = 0;
1301
1302 xfer_buffers_map(renderer);
1303
1304 return true;
1305 }
1306
1307 void
1308 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1309 {
1310 assert(renderer);
1311
1312 xfer_buffers_unmap(renderer);
1313
1314 cleanup_pipe_state(renderer);
1315 cleanup_shaders(renderer);
1316 cleanup_buffers(renderer);
1317
1318 pipe_surface_reference(&renderer->surface, NULL);
1319 pipe_surface_reference(&renderer->past, NULL);
1320 pipe_surface_reference(&renderer->future, NULL);
1321 }
1322
1323 void
1324 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1325 *renderer,
1326 struct pipe_surface *surface,
1327 struct pipe_surface *past,
1328 struct pipe_surface *future,
1329 unsigned num_macroblocks,
1330 struct pipe_mpeg12_macroblock
1331 *mpeg12_macroblocks,
1332 struct pipe_fence_handle **fence)
1333 {
1334 bool new_surface = false;
1335
1336 assert(renderer);
1337 assert(surface);
1338 assert(num_macroblocks);
1339 assert(mpeg12_macroblocks);
1340
1341 if (renderer->surface) {
1342 if (surface != renderer->surface) {
1343 if (renderer->num_macroblocks > 0) {
1344 xfer_buffers_unmap(renderer);
1345 flush(renderer);
1346 }
1347
1348 new_surface = true;
1349 }
1350
1351 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1352 assert(surface != renderer->surface || renderer->past == past);
1353 assert(surface != renderer->surface || renderer->future == future);
1354 }
1355 else
1356 new_surface = true;
1357
1358 if (new_surface) {
1359 pipe_surface_reference(&renderer->surface, surface);
1360 pipe_surface_reference(&renderer->past, past);
1361 pipe_surface_reference(&renderer->future, future);
1362 renderer->fence = fence;
1363 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1364 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1365 }
1366
1367 while (num_macroblocks) {
1368 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1369 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1370 unsigned i;
1371
1372 for (i = 0; i < num_to_submit; ++i) {
1373 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1374 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1375 }
1376
1377 num_macroblocks -= num_to_submit;
1378
1379 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1380 xfer_buffers_unmap(renderer);
1381 flush(renderer);
1382 xfer_buffers_map(renderer);
1383 /* Next time we get this surface it may have new ref frames */
1384 pipe_surface_reference(&renderer->surface, NULL);
1385 pipe_surface_reference(&renderer->past, NULL);
1386 pipe_surface_reference(&renderer->future, NULL);
1387 }
1388 }
1389 }