Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <tgsi/tgsi_ureg.h>
36
37 #define DEFAULT_BUF_ALIGNMENT 1
38 #define MACROBLOCK_WIDTH 16
39 #define MACROBLOCK_HEIGHT 16
40 #define BLOCK_WIDTH 8
41 #define BLOCK_HEIGHT 8
42 #define ZERO_BLOCK_NIL -1.0f
43 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
44 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
45
46 struct vertex_shader_consts
47 {
48 struct vertex4f denorm;
49 };
50
51 struct fragment_shader_consts
52 {
53 struct vertex4f multiplier;
54 struct vertex4f div;
55 };
56
57 struct vert_stream_0
58 {
59 struct vertex2f pos;
60 struct vertex2f luma_tc;
61 struct vertex2f cb_tc;
62 struct vertex2f cr_tc;
63 };
64
65 enum MACROBLOCK_TYPE
66 {
67 MACROBLOCK_TYPE_INTRA,
68 MACROBLOCK_TYPE_FWD_FRAME_PRED,
69 MACROBLOCK_TYPE_FWD_FIELD_PRED,
70 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
71 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
72 MACROBLOCK_TYPE_BI_FRAME_PRED,
73 MACROBLOCK_TYPE_BI_FIELD_PRED,
74
75 NUM_MACROBLOCK_TYPES
76 };
77
78 static bool
79 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
80 {
81 struct ureg_program *shader;
82 struct ureg_src vpos, vtex[3];
83 struct ureg_dst o_vpos, o_vtex[3];
84 unsigned i;
85
86 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
87 if (!shader)
88 return false;
89
90 vpos = ureg_DECL_vs_input(shader, 0);
91 for (i = 0; i < 3; ++i)
92 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
93 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
94 for (i = 0; i < 3; ++i)
95 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
96
97 /*
98 * o_vpos = vpos
99 * o_vtex[0..2] = vtex[0..2]
100 */
101 ureg_MOV(shader, o_vpos, vpos);
102 for (i = 0; i < 3; ++i)
103 ureg_MOV(shader, o_vtex[i], vtex[i]);
104
105 ureg_END(shader);
106
107 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
108 if (!r->i_vs)
109 return false;
110
111 return true;
112 }
113
114 static bool
115 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
116 {
117 struct ureg_program *shader;
118 struct ureg_src tc[3];
119 struct ureg_src sampler[3];
120 struct ureg_dst texel, temp;
121 struct ureg_dst fragment;
122 unsigned i;
123
124 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
125 if (!shader)
126 return false;
127
128 for (i = 0; i < 3; ++i) {
129 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
130 sampler[i] = ureg_DECL_sampler(shader, i);
131 }
132 texel = ureg_DECL_temporary(shader);
133 temp = ureg_DECL_temporary(shader);
134 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
135
136 /*
137 * texel.r = tex(tc[0], sampler[0])
138 * texel.g = tex(tc[1], sampler[1])
139 * texel.b = tex(tc[2], sampler[2])
140 * fragment = texel * scale
141 */
142 for (i = 0; i < 3; ++i) {
143 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
144 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
145 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
146 }
147 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
148
149 ureg_release_temporary(shader, texel);
150 ureg_release_temporary(shader, temp);
151 ureg_END(shader);
152
153 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
154 if (!r->i_fs)
155 return false;
156
157 return true;
158 }
159
160 static bool
161 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
162 {
163 struct ureg_program *shader;
164 struct ureg_src vpos, vtex[4];
165 struct ureg_dst o_vpos, o_vtex[4];
166 unsigned i;
167
168 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
169 if (!shader)
170 return false;
171
172 vpos = ureg_DECL_vs_input(shader, 0);
173 for (i = 0; i < 4; ++i)
174 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
175 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
176 for (i = 0; i < 4; ++i)
177 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
178
179 /*
180 * o_vpos = vpos
181 * o_vtex[0..2] = vtex[0..2]
182 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
183 */
184 ureg_MOV(shader, o_vpos, vpos);
185 for (i = 0; i < 3; ++i)
186 ureg_MOV(shader, o_vtex[i], vtex[i]);
187 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
188
189 ureg_END(shader);
190
191 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
192 if (!r->p_vs[0])
193 return false;
194
195 return true;
196 }
197
198 #if 0
199 static void
200 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
201 {
202 assert(false);
203 }
204 #endif
205
206 static bool
207 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
208 {
209 struct ureg_program *shader;
210 struct ureg_src tc[4];
211 struct ureg_src sampler[4];
212 struct ureg_dst texel, ref;
213 struct ureg_dst fragment;
214 unsigned i;
215
216 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
217 if (!shader)
218 return false;
219
220 for (i = 0; i < 4; ++i) {
221 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
222 sampler[i] = ureg_DECL_sampler(shader, i);
223 }
224 texel = ureg_DECL_temporary(shader);
225 ref = ureg_DECL_temporary(shader);
226 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
227
228 /*
229 * texel.r = tex(tc[0], sampler[0])
230 * texel.g = tex(tc[1], sampler[1])
231 * texel.b = tex(tc[2], sampler[2])
232 * ref = tex(tc[3], sampler[3])
233 * fragment = texel * scale + ref
234 */
235 for (i = 0; i < 3; ++i) {
236 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
237 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
238 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
239 }
240 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
241 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
242
243 ureg_release_temporary(shader, texel);
244 ureg_release_temporary(shader, ref);
245 ureg_END(shader);
246
247 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
248 if (!r->p_fs[0])
249 return false;
250
251 return true;
252 }
253
254 #if 0
255 static void
256 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
257 {
258 assert(false);
259 }
260 #endif
261
262 static bool
263 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
264 {
265 struct ureg_program *shader;
266 struct ureg_src vpos, vtex[5];
267 struct ureg_dst o_vpos, o_vtex[5];
268 unsigned i;
269
270 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
271 if (!shader)
272 return false;
273
274 vpos = ureg_DECL_vs_input(shader, 0);
275 for (i = 0; i < 4; ++i)
276 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
277 /* Skip input 5 */
278 vtex[4] = ureg_DECL_vs_input(shader, 6);
279 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
280 for (i = 0; i < 5; ++i)
281 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
282
283 /*
284 * o_vpos = vpos
285 * o_vtex[0..2] = vtex[0..2]
286 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
287 */
288 ureg_MOV(shader, o_vpos, vpos);
289 for (i = 0; i < 3; ++i)
290 ureg_MOV(shader, o_vtex[i], vtex[i]);
291 for (i = 3; i < 5; ++i)
292 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
293
294 ureg_END(shader);
295
296 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
297 if (!r->b_vs[0])
298 return false;
299
300 return true;
301 }
302
303 #if 0
304 static void
305 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
306 {
307 assert(false);
308 }
309 #endif
310
311 static bool
312 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
313 {
314 struct ureg_program *shader;
315 struct ureg_src tc[5];
316 struct ureg_src sampler[5];
317 struct ureg_dst texel, ref[2];
318 struct ureg_dst fragment;
319 unsigned i;
320
321 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
322 if (!shader)
323 return false;
324
325 for (i = 0; i < 5; ++i) {
326 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
327 sampler[i] = ureg_DECL_sampler(shader, i);
328 }
329 texel = ureg_DECL_temporary(shader);
330 ref[0] = ureg_DECL_temporary(shader);
331 ref[1] = ureg_DECL_temporary(shader);
332 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
333
334 /*
335 * texel.r = tex(tc[0], sampler[0])
336 * texel.g = tex(tc[1], sampler[1])
337 * texel.b = tex(tc[2], sampler[2])
338 * ref[0..1 = tex(tc[3..4], sampler[3..4])
339 * ref[0] = lerp(ref[0], ref[1], 0.5)
340 * fragment = texel * scale + ref[0]
341 */
342 for (i = 0; i < 3; ++i) {
343 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
344 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
345 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
346 }
347 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
348 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
349 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
350
351 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
352
353 ureg_release_temporary(shader, texel);
354 ureg_release_temporary(shader, ref[0]);
355 ureg_release_temporary(shader, ref[1]);
356 ureg_END(shader);
357
358 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
359 if (!r->b_fs[0])
360 return false;
361
362 return true;
363 }
364
365 #if 0
366 static void
367 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
368 {
369 assert(false);
370 }
371 #endif
372
373 static void
374 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
375 {
376 unsigned i;
377
378 assert(r);
379
380 for (i = 0; i < 3; ++i) {
381 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
382 (
383 r->pipe->screen, r->textures.all[i],
384 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
385 r->textures.all[i]->width0, r->textures.all[i]->height0
386 );
387
388 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
389 }
390 }
391
392 static void
393 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
394 {
395 unsigned i;
396
397 assert(r);
398
399 for (i = 0; i < 3; ++i) {
400 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
401 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
402 }
403 }
404
405 static bool
406 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
407 {
408 struct pipe_sampler_state sampler;
409 unsigned filters[5];
410 unsigned i;
411
412 assert(r);
413
414 r->viewport.scale[0] = r->pot_buffers ?
415 util_next_power_of_two(r->picture_width) : r->picture_width;
416 r->viewport.scale[1] = r->pot_buffers ?
417 util_next_power_of_two(r->picture_height) : r->picture_height;
418 r->viewport.scale[2] = 1;
419 r->viewport.scale[3] = 1;
420 r->viewport.translate[0] = 0;
421 r->viewport.translate[1] = 0;
422 r->viewport.translate[2] = 0;
423 r->viewport.translate[3] = 0;
424
425 r->fb_state.width = r->pot_buffers ?
426 util_next_power_of_two(r->picture_width) : r->picture_width;
427 r->fb_state.height = r->pot_buffers ?
428 util_next_power_of_two(r->picture_height) : r->picture_height;
429 r->fb_state.nr_cbufs = 1;
430 r->fb_state.zsbuf = NULL;
431
432 /* Luma filter */
433 filters[0] = PIPE_TEX_FILTER_NEAREST;
434 /* Chroma filters */
435 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
436 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
437 filters[1] = PIPE_TEX_FILTER_NEAREST;
438 filters[2] = PIPE_TEX_FILTER_NEAREST;
439 }
440 else {
441 filters[1] = PIPE_TEX_FILTER_LINEAR;
442 filters[2] = PIPE_TEX_FILTER_LINEAR;
443 }
444 /* Fwd, bkwd ref filters */
445 filters[3] = PIPE_TEX_FILTER_LINEAR;
446 filters[4] = PIPE_TEX_FILTER_LINEAR;
447
448 for (i = 0; i < 5; ++i) {
449 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
450 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
451 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
452 sampler.min_img_filter = filters[i];
453 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
454 sampler.mag_img_filter = filters[i];
455 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
456 sampler.compare_func = PIPE_FUNC_ALWAYS;
457 sampler.normalized_coords = 1;
458 /*sampler.shadow_ambient = ; */
459 /*sampler.lod_bias = ; */
460 sampler.min_lod = 0;
461 /*sampler.max_lod = ; */
462 /*sampler.border_color[i] = ; */
463 /*sampler.max_anisotropy = ; */
464 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
465 }
466
467 return true;
468 }
469
470 static void
471 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
472 {
473 unsigned i;
474
475 assert(r);
476
477 for (i = 0; i < 5; ++i)
478 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
479 }
480
481 static bool
482 init_shaders(struct vl_mpeg12_mc_renderer *r)
483 {
484 assert(r);
485
486 create_intra_vert_shader(r);
487 create_intra_frag_shader(r);
488 create_frame_pred_vert_shader(r);
489 create_frame_pred_frag_shader(r);
490 create_frame_bi_pred_vert_shader(r);
491 create_frame_bi_pred_frag_shader(r);
492
493 return true;
494 }
495
496 static void
497 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
498 {
499 assert(r);
500
501 r->pipe->delete_vs_state(r->pipe, r->i_vs);
502 r->pipe->delete_fs_state(r->pipe, r->i_fs);
503 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
504 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
505 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
506 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
507 }
508
509 static bool
510 init_buffers(struct vl_mpeg12_mc_renderer *r)
511 {
512 struct pipe_texture template;
513
514 const unsigned mbw =
515 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
516 const unsigned mbh =
517 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
518
519 unsigned i;
520
521 assert(r);
522
523 r->macroblocks_per_batch =
524 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
525 r->num_macroblocks = 0;
526 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
527
528 memset(&template, 0, sizeof(struct pipe_texture));
529 template.target = PIPE_TEXTURE_2D;
530 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
531 template.format = PIPE_FORMAT_R16_SNORM;
532 template.last_level = 0;
533 template.width0 = r->pot_buffers ?
534 util_next_power_of_two(r->picture_width) : r->picture_width;
535 template.height0 = r->pot_buffers ?
536 util_next_power_of_two(r->picture_height) : r->picture_height;
537 template.depth0 = 1;
538 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
539
540 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
541
542 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
543 template.width0 = r->pot_buffers ?
544 util_next_power_of_two(r->picture_width / 2) :
545 r->picture_width / 2;
546 template.height0 = r->pot_buffers ?
547 util_next_power_of_two(r->picture_height / 2) :
548 r->picture_height / 2;
549 }
550 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
551 template.height0 = r->pot_buffers ?
552 util_next_power_of_two(r->picture_height / 2) :
553 r->picture_height / 2;
554
555 r->textures.individual.cb =
556 r->pipe->screen->texture_create(r->pipe->screen, &template);
557 r->textures.individual.cr =
558 r->pipe->screen->texture_create(r->pipe->screen, &template);
559
560 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
561 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
562 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
563 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
564 (
565 r->pipe->screen,
566 DEFAULT_BUF_ALIGNMENT,
567 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
568 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
569 );
570
571 for (i = 1; i < 3; ++i) {
572 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
573 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
574 r->vertex_bufs.all[i].buffer_offset = 0;
575 r->vertex_bufs.all[i].buffer = pipe_buffer_create
576 (
577 r->pipe->screen,
578 DEFAULT_BUF_ALIGNMENT,
579 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
580 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
581 );
582 }
583
584 /* Position element */
585 r->vertex_elems[0].src_offset = 0;
586 r->vertex_elems[0].instance_divisor = 0;
587 r->vertex_elems[0].vertex_buffer_index = 0;
588 r->vertex_elems[0].nr_components = 2;
589 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
590
591 /* Luma, texcoord element */
592 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
593 r->vertex_elems[1].instance_divisor = 0;
594 r->vertex_elems[1].vertex_buffer_index = 0;
595 r->vertex_elems[1].nr_components = 2;
596 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
597
598 /* Chroma Cr texcoord element */
599 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
600 r->vertex_elems[2].instance_divisor = 0;
601 r->vertex_elems[2].vertex_buffer_index = 0;
602 r->vertex_elems[2].nr_components = 2;
603 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
604
605 /* Chroma Cb texcoord element */
606 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
607 r->vertex_elems[3].instance_divisor = 0;
608 r->vertex_elems[3].vertex_buffer_index = 0;
609 r->vertex_elems[3].nr_components = 2;
610 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
611
612 /* First ref surface top field texcoord element */
613 r->vertex_elems[4].src_offset = 0;
614 r->vertex_elems[4].instance_divisor = 0;
615 r->vertex_elems[4].vertex_buffer_index = 1;
616 r->vertex_elems[4].nr_components = 2;
617 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
618
619 /* First ref surface bottom field texcoord element */
620 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
621 r->vertex_elems[5].instance_divisor = 0;
622 r->vertex_elems[5].vertex_buffer_index = 1;
623 r->vertex_elems[5].nr_components = 2;
624 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
625
626 /* Second ref surface top field texcoord element */
627 r->vertex_elems[6].src_offset = 0;
628 r->vertex_elems[6].instance_divisor = 0;
629 r->vertex_elems[6].vertex_buffer_index = 2;
630 r->vertex_elems[6].nr_components = 2;
631 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
632
633 /* Second ref surface bottom field texcoord element */
634 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
635 r->vertex_elems[7].instance_divisor = 0;
636 r->vertex_elems[7].vertex_buffer_index = 2;
637 r->vertex_elems[7].nr_components = 2;
638 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
639
640 r->vs_const_buf = pipe_buffer_create
641 (
642 r->pipe->screen,
643 DEFAULT_BUF_ALIGNMENT,
644 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
645 sizeof(struct vertex_shader_consts)
646 );
647
648 return true;
649 }
650
651 static void
652 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
653 {
654 unsigned i;
655
656 assert(r);
657
658 pipe_buffer_reference(&r->vs_const_buf, NULL);
659
660 for (i = 0; i < 3; ++i)
661 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
662
663 for (i = 0; i < 3; ++i)
664 pipe_texture_reference(&r->textures.all[i], NULL);
665
666 FREE(r->macroblock_buf);
667 }
668
669 static enum MACROBLOCK_TYPE
670 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
671 {
672 assert(mb);
673
674 switch (mb->mb_type) {
675 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
676 return MACROBLOCK_TYPE_INTRA;
677 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
678 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
679 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
680 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
681 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
682 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
683 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
684 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
685 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
686 default:
687 assert(0);
688 }
689
690 /* Unreachable */
691 return -1;
692 }
693
694 static void
695 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
696 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
697 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
698 bool use_zeroblocks, struct vertex2f *zero_blocks)
699 {
700 struct vertex2f v;
701
702 assert(vb);
703 assert(unit && half && offset);
704 assert(zero_blocks || !use_zeroblocks);
705
706 /* Generate vertices for two triangles covering a block */
707 v.x = mbx * unit->x + offset->x;
708 v.y = mby * unit->y + offset->y;
709
710 vb[0].pos.x = v.x;
711 vb[0].pos.y = v.y;
712 vb[1].pos.x = v.x;
713 vb[1].pos.y = v.y + half->y;
714 vb[2].pos.x = v.x + half->x;
715 vb[2].pos.y = v.y;
716 vb[3].pos.x = v.x + half->x;
717 vb[3].pos.y = v.y;
718 vb[4].pos.x = v.x;
719 vb[4].pos.y = v.y + half->y;
720 vb[5].pos.x = v.x + half->x;
721 vb[5].pos.y = v.y + half->y;
722
723 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
724 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
725 for this channel is defined for this block) */
726
727 if (!use_zeroblocks || cbp & luma_mask) {
728 v.x = mbx * unit->x + offset->x;
729 v.y = mby * unit->y + offset->y;
730 }
731 else {
732 v.x = zero_blocks[0].x;
733 v.y = zero_blocks[0].y;
734 }
735
736 vb[0].luma_tc.x = v.x;
737 vb[0].luma_tc.y = v.y;
738 vb[1].luma_tc.x = v.x;
739 vb[1].luma_tc.y = v.y + half->y;
740 vb[2].luma_tc.x = v.x + half->x;
741 vb[2].luma_tc.y = v.y;
742 vb[3].luma_tc.x = v.x + half->x;
743 vb[3].luma_tc.y = v.y;
744 vb[4].luma_tc.x = v.x;
745 vb[4].luma_tc.y = v.y + half->y;
746 vb[5].luma_tc.x = v.x + half->x;
747 vb[5].luma_tc.y = v.y + half->y;
748
749 if (!use_zeroblocks || cbp & cb_mask) {
750 v.x = mbx * unit->x + offset->x;
751 v.y = mby * unit->y + offset->y;
752 }
753 else {
754 v.x = zero_blocks[1].x;
755 v.y = zero_blocks[1].y;
756 }
757
758 vb[0].cb_tc.x = v.x;
759 vb[0].cb_tc.y = v.y;
760 vb[1].cb_tc.x = v.x;
761 vb[1].cb_tc.y = v.y + half->y;
762 vb[2].cb_tc.x = v.x + half->x;
763 vb[2].cb_tc.y = v.y;
764 vb[3].cb_tc.x = v.x + half->x;
765 vb[3].cb_tc.y = v.y;
766 vb[4].cb_tc.x = v.x;
767 vb[4].cb_tc.y = v.y + half->y;
768 vb[5].cb_tc.x = v.x + half->x;
769 vb[5].cb_tc.y = v.y + half->y;
770
771 if (!use_zeroblocks || cbp & cr_mask) {
772 v.x = mbx * unit->x + offset->x;
773 v.y = mby * unit->y + offset->y;
774 }
775 else {
776 v.x = zero_blocks[2].x;
777 v.y = zero_blocks[2].y;
778 }
779
780 vb[0].cr_tc.x = v.x;
781 vb[0].cr_tc.y = v.y;
782 vb[1].cr_tc.x = v.x;
783 vb[1].cr_tc.y = v.y + half->y;
784 vb[2].cr_tc.x = v.x + half->x;
785 vb[2].cr_tc.y = v.y;
786 vb[3].cr_tc.x = v.x + half->x;
787 vb[3].cr_tc.y = v.y;
788 vb[4].cr_tc.x = v.x;
789 vb[4].cr_tc.y = v.y + half->y;
790 vb[5].cr_tc.x = v.x + half->x;
791 vb[5].cr_tc.y = v.y + half->y;
792 }
793
794 static void
795 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
796 struct pipe_mpeg12_macroblock *mb, unsigned pos,
797 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
798 {
799 struct vertex2f mo_vec[2];
800
801 unsigned i;
802
803 assert(r);
804 assert(mb);
805 assert(ycbcr_vb);
806 assert(pos < r->macroblocks_per_batch);
807
808 mo_vec[1].x = 0;
809 mo_vec[1].y = 0;
810
811 switch (mb->mb_type) {
812 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
813 {
814 struct vertex2f *vb;
815
816 assert(ref_vb && ref_vb[1]);
817
818 vb = ref_vb[1] + pos * 2 * 24;
819
820 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
821 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
822
823 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
824 for (i = 0; i < 24 * 2; i += 2) {
825 vb[i].x = mo_vec[0].x;
826 vb[i].y = mo_vec[0].y;
827 }
828 }
829 else {
830 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
831 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
832
833 for (i = 0; i < 24 * 2; i += 2) {
834 vb[i].x = mo_vec[0].x;
835 vb[i].y = mo_vec[0].y;
836 vb[i + 1].x = mo_vec[1].x;
837 vb[i + 1].y = mo_vec[1].y;
838 }
839 }
840
841 /* fall-through */
842 }
843 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
844 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
845 {
846 struct vertex2f *vb;
847
848 assert(ref_vb && ref_vb[0]);
849
850 vb = ref_vb[0] + pos * 2 * 24;
851
852 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
853 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
854 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
855
856 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
857 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
858 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
859 }
860 }
861 else {
862 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
863 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
864
865 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
866 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
867 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
868 }
869 }
870
871 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
872 for (i = 0; i < 24 * 2; i += 2) {
873 vb[i].x = mo_vec[0].x;
874 vb[i].y = mo_vec[0].y;
875 }
876 }
877 else {
878 for (i = 0; i < 24 * 2; i += 2) {
879 vb[i].x = mo_vec[0].x;
880 vb[i].y = mo_vec[0].y;
881 vb[i + 1].x = mo_vec[1].x;
882 vb[i + 1].y = mo_vec[1].y;
883 }
884 }
885
886 /* fall-through */
887 }
888 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
889 {
890 const struct vertex2f unit =
891 {
892 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
893 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
894 };
895 const struct vertex2f half =
896 {
897 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
898 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
899 };
900 const struct vertex2f offsets[2][2] =
901 {
902 {
903 {0, 0}, {0, half.y}
904 },
905 {
906 {half.x, 0}, {half.x, half.y}
907 }
908 };
909 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
910
911 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
912
913 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
914 &unit, &half, &offsets[0][0],
915 32, 2, 1, use_zb, r->zero_block);
916
917 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
918 &unit, &half, &offsets[1][0],
919 16, 2, 1, use_zb, r->zero_block);
920
921 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
922 &unit, &half, &offsets[0][1],
923 8, 2, 1, use_zb, r->zero_block);
924
925 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
926 &unit, &half, &offsets[1][1],
927 4, 2, 1, use_zb, r->zero_block);
928
929 break;
930 }
931 default:
932 assert(0);
933 }
934 }
935
936 static void
937 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
938 unsigned *num_macroblocks)
939 {
940 unsigned offset[NUM_MACROBLOCK_TYPES];
941 struct vert_stream_0 *ycbcr_vb;
942 struct vertex2f *ref_vb[2];
943 unsigned i;
944
945 assert(r);
946 assert(num_macroblocks);
947
948 for (i = 0; i < r->num_macroblocks; ++i) {
949 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
950 ++num_macroblocks[mb_type];
951 }
952
953 offset[0] = 0;
954
955 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
956 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
957
958 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
959 (
960 r->pipe->screen,
961 r->vertex_bufs.individual.ycbcr.buffer,
962 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
963 );
964
965 for (i = 0; i < 2; ++i)
966 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
967 (
968 r->pipe->screen,
969 r->vertex_bufs.individual.ref[i].buffer,
970 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
971 );
972
973 for (i = 0; i < r->num_macroblocks; ++i) {
974 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
975
976 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
977 ycbcr_vb, ref_vb);
978
979 ++offset[mb_type];
980 }
981
982 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
983 for (i = 0; i < 2; ++i)
984 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
985 }
986
987 static void
988 flush(struct vl_mpeg12_mc_renderer *r)
989 {
990 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
991 unsigned vb_start = 0;
992 struct vertex_shader_consts *vs_consts;
993 unsigned i;
994
995 assert(r);
996 assert(r->num_macroblocks == r->macroblocks_per_batch);
997
998 gen_macroblock_stream(r, num_macroblocks);
999
1000 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1001 (
1002 r->pipe->screen, r->surface,
1003 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1004 );
1005
1006 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1007 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1008
1009 vs_consts = pipe_buffer_map
1010 (
1011 r->pipe->screen, r->vs_const_buf,
1012 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1013 );
1014
1015 vs_consts->denorm.x = r->surface->width0;
1016 vs_consts->denorm.y = r->surface->height0;
1017
1018 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
1019
1020 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1021 r->vs_const_buf);
1022
1023 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1024 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1025 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1026 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1027 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1028 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1029 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1030
1031 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1032 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1033 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1034 }
1035
1036 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1037 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1038 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1039 r->textures.individual.ref[0] = r->past;
1040 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1041 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1042 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1043 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1044
1045 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1046 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1047 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1048 }
1049
1050 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1051 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1052 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1053 r->textures.individual.ref[0] = r->past;
1054 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1055 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1056 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1057 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1058
1059 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1060 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1061 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1062 }
1063
1064 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1065 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1066 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1067 r->textures.individual.ref[0] = r->future;
1068 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1069 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1070 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1071 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1072
1073 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1074 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1075 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1076 }
1077
1078 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1079 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1080 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1081 r->textures.individual.ref[0] = r->future;
1082 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1083 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1084 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1085 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1086
1087 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1088 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1089 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1090 }
1091
1092 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1093 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1094 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1095 r->textures.individual.ref[0] = r->past;
1096 r->textures.individual.ref[1] = r->future;
1097 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1098 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1099 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1100 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1101
1102 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1103 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1104 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1105 }
1106
1107 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1108 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1109 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1110 r->textures.individual.ref[0] = r->past;
1111 r->textures.individual.ref[1] = r->future;
1112 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1113 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1114 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1115 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1116
1117 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1118 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1119 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1120 }
1121
1122 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1123 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1124
1125 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1126 for (i = 0; i < 3; ++i)
1127 r->zero_block[i].x = ZERO_BLOCK_NIL;
1128
1129 r->num_macroblocks = 0;
1130 }
1131
1132 static void
1133 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1134 {
1135 unsigned y;
1136
1137 assert(src);
1138 assert(dst);
1139
1140 for (y = 0; y < BLOCK_HEIGHT; ++y)
1141 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1142 }
1143
1144 static void
1145 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1146 {
1147 unsigned y;
1148
1149 assert(src);
1150 assert(dst);
1151
1152 for (y = 0; y < BLOCK_HEIGHT; ++y)
1153 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1154 }
1155
1156 static void
1157 fill_zero_block(short *dst, unsigned dst_pitch)
1158 {
1159 unsigned y;
1160
1161 assert(dst);
1162
1163 for (y = 0; y < BLOCK_HEIGHT; ++y)
1164 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1165 }
1166
1167 static void
1168 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1169 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1170 {
1171 unsigned tex_pitch;
1172 short *texels;
1173 unsigned tb = 0, sb = 0;
1174 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1175 unsigned x, y;
1176
1177 assert(r);
1178 assert(blocks);
1179
1180 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
1181 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1182
1183 for (y = 0; y < 2; ++y) {
1184 for (x = 0; x < 2; ++x, ++tb) {
1185 if ((cbp >> (5 - tb)) & 1) {
1186 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1187 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1188 texels + y * tex_pitch * BLOCK_WIDTH +
1189 x * BLOCK_WIDTH, tex_pitch);
1190 }
1191 else {
1192 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1193 texels + y * tex_pitch + x * BLOCK_WIDTH,
1194 tex_pitch);
1195 }
1196
1197 ++sb;
1198 }
1199 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1200 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1201 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1202 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1203 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1204 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1205 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1206 }
1207 }
1208 }
1209 }
1210 }
1211
1212 /* TODO: Implement 422, 444 */
1213 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1214
1215 mbpx /= 2;
1216 mbpy /= 2;
1217
1218 for (tb = 0; tb < 2; ++tb) {
1219 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1220 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1221
1222 if ((cbp >> (1 - tb)) & 1) {
1223 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1224 ++sb;
1225 }
1226 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1227 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1228 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1229 fill_zero_block(texels, tex_pitch);
1230 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1231 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1232 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1233 }
1234 }
1235 }
1236 }
1237 }
1238
1239 static void
1240 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1241 struct pipe_mpeg12_macroblock *mb)
1242 {
1243 void *blocks;
1244
1245 assert(r);
1246 assert(mb);
1247 assert(r->num_macroblocks < r->macroblocks_per_batch);
1248
1249 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1250 sizeof(struct pipe_mpeg12_macroblock));
1251
1252 blocks = pipe_buffer_map(r->pipe->screen, mb->blocks,
1253 PIPE_BUFFER_USAGE_CPU_READ);
1254 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, blocks);
1255 pipe_buffer_unmap(r->pipe->screen, mb->blocks);
1256
1257 ++r->num_macroblocks;
1258 }
1259
1260 bool
1261 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1262 struct pipe_context *pipe,
1263 unsigned picture_width,
1264 unsigned picture_height,
1265 enum pipe_video_chroma_format chroma_format,
1266 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1267 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1268 bool pot_buffers)
1269 {
1270 unsigned i;
1271
1272 assert(renderer);
1273 assert(pipe);
1274 /* TODO: Implement other policies */
1275 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1276 /* TODO: Implement this */
1277 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1278 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1279 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1280 assert(pot_buffers);
1281
1282 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1283
1284 renderer->pipe = pipe;
1285 renderer->picture_width = picture_width;
1286 renderer->picture_height = picture_height;
1287 renderer->chroma_format = chroma_format;
1288 renderer->bufmode = bufmode;
1289 renderer->eb_handling = eb_handling;
1290 renderer->pot_buffers = pot_buffers;
1291
1292 if (!init_pipe_state(renderer))
1293 return false;
1294 if (!init_shaders(renderer)) {
1295 cleanup_pipe_state(renderer);
1296 return false;
1297 }
1298 if (!init_buffers(renderer)) {
1299 cleanup_shaders(renderer);
1300 cleanup_pipe_state(renderer);
1301 return false;
1302 }
1303
1304 renderer->surface = NULL;
1305 renderer->past = NULL;
1306 renderer->future = NULL;
1307 for (i = 0; i < 3; ++i)
1308 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1309 renderer->num_macroblocks = 0;
1310
1311 xfer_buffers_map(renderer);
1312
1313 return true;
1314 }
1315
1316 void
1317 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1318 {
1319 assert(renderer);
1320
1321 xfer_buffers_unmap(renderer);
1322
1323 cleanup_pipe_state(renderer);
1324 cleanup_shaders(renderer);
1325 cleanup_buffers(renderer);
1326 }
1327
1328 void
1329 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1330 *renderer,
1331 struct pipe_texture *surface,
1332 struct pipe_texture *past,
1333 struct pipe_texture *future,
1334 unsigned num_macroblocks,
1335 struct pipe_mpeg12_macroblock
1336 *mpeg12_macroblocks,
1337 struct pipe_fence_handle **fence)
1338 {
1339 bool new_surface = false;
1340
1341 assert(renderer);
1342 assert(surface);
1343 assert(num_macroblocks);
1344 assert(mpeg12_macroblocks);
1345
1346 if (renderer->surface) {
1347 if (surface != renderer->surface) {
1348 if (renderer->num_macroblocks > 0) {
1349 xfer_buffers_unmap(renderer);
1350 flush(renderer);
1351 }
1352
1353 new_surface = true;
1354 }
1355
1356 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1357 assert(surface != renderer->surface || renderer->past == past);
1358 assert(surface != renderer->surface || renderer->future == future);
1359 }
1360 else
1361 new_surface = true;
1362
1363 if (new_surface) {
1364 renderer->surface = surface;
1365 renderer->past = past;
1366 renderer->future = future;
1367 renderer->fence = fence;
1368 renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
1369 renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
1370 }
1371
1372 while (num_macroblocks) {
1373 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1374 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1375 unsigned i;
1376
1377 for (i = 0; i < num_to_submit; ++i) {
1378 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1379 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1380 }
1381
1382 num_macroblocks -= num_to_submit;
1383
1384 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1385 xfer_buffers_unmap(renderer);
1386 flush(renderer);
1387 xfer_buffers_map(renderer);
1388 /* Next time we get this surface it may have new ref frames */
1389 renderer->surface = NULL;
1390 }
1391 }
1392 }