Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <util/u_sampler.h>
36 #include <tgsi/tgsi_ureg.h>
37
38 #define DEFAULT_BUF_ALIGNMENT 1
39 #define MACROBLOCK_WIDTH 16
40 #define MACROBLOCK_HEIGHT 16
41 #define BLOCK_WIDTH 8
42 #define BLOCK_HEIGHT 8
43 #define ZERO_BLOCK_NIL -1.0f
44 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
45 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
46
47 struct vertex_shader_consts
48 {
49 struct vertex4f denorm;
50 };
51
52 struct fragment_shader_consts
53 {
54 struct vertex4f multiplier;
55 struct vertex4f div;
56 };
57
58 struct vert_stream_0
59 {
60 struct vertex2f pos;
61 struct vertex2f luma_tc;
62 struct vertex2f cb_tc;
63 struct vertex2f cr_tc;
64 };
65
66 enum MACROBLOCK_TYPE
67 {
68 MACROBLOCK_TYPE_INTRA,
69 MACROBLOCK_TYPE_FWD_FRAME_PRED,
70 MACROBLOCK_TYPE_FWD_FIELD_PRED,
71 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
72 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
73 MACROBLOCK_TYPE_BI_FRAME_PRED,
74 MACROBLOCK_TYPE_BI_FIELD_PRED,
75
76 NUM_MACROBLOCK_TYPES
77 };
78
79 static bool
80 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
81 {
82 struct ureg_program *shader;
83 struct ureg_src vpos, vtex[3];
84 struct ureg_dst o_vpos, o_vtex[3];
85 unsigned i;
86
87 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
88 if (!shader)
89 return false;
90
91 vpos = ureg_DECL_vs_input(shader, 0);
92 for (i = 0; i < 3; ++i)
93 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
94 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
95 for (i = 0; i < 3; ++i)
96 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
97
98 /*
99 * o_vpos = vpos
100 * o_vtex[0..2] = vtex[0..2]
101 */
102 ureg_MOV(shader, o_vpos, vpos);
103 for (i = 0; i < 3; ++i)
104 ureg_MOV(shader, o_vtex[i], vtex[i]);
105
106 ureg_END(shader);
107
108 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
109 if (!r->i_vs)
110 return false;
111
112 return true;
113 }
114
115 static bool
116 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
117 {
118 struct ureg_program *shader;
119 struct ureg_src tc[3];
120 struct ureg_src sampler[3];
121 struct ureg_dst texel, temp;
122 struct ureg_dst fragment;
123 unsigned i;
124
125 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
126 if (!shader)
127 return false;
128
129 for (i = 0; i < 3; ++i) {
130 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
131 sampler[i] = ureg_DECL_sampler(shader, i);
132 }
133 texel = ureg_DECL_temporary(shader);
134 temp = ureg_DECL_temporary(shader);
135 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
136
137 /*
138 * texel.r = tex(tc[0], sampler[0])
139 * texel.g = tex(tc[1], sampler[1])
140 * texel.b = tex(tc[2], sampler[2])
141 * fragment = texel * scale
142 */
143 for (i = 0; i < 3; ++i) {
144 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
145 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
146 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
147 }
148 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
149
150 ureg_release_temporary(shader, texel);
151 ureg_release_temporary(shader, temp);
152 ureg_END(shader);
153
154 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
155 if (!r->i_fs)
156 return false;
157
158 return true;
159 }
160
161 static bool
162 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
163 {
164 struct ureg_program *shader;
165 struct ureg_src vpos, vtex[4];
166 struct ureg_dst o_vpos, o_vtex[4];
167 unsigned i;
168
169 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
170 if (!shader)
171 return false;
172
173 vpos = ureg_DECL_vs_input(shader, 0);
174 for (i = 0; i < 4; ++i)
175 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
176 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
177 for (i = 0; i < 4; ++i)
178 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
179
180 /*
181 * o_vpos = vpos
182 * o_vtex[0..2] = vtex[0..2]
183 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
184 */
185 ureg_MOV(shader, o_vpos, vpos);
186 for (i = 0; i < 3; ++i)
187 ureg_MOV(shader, o_vtex[i], vtex[i]);
188 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
189
190 ureg_END(shader);
191
192 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
193 if (!r->p_vs[0])
194 return false;
195
196 return true;
197 }
198
199 #if 0
200 static void
201 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
202 {
203 assert(false);
204 }
205 #endif
206
207 static bool
208 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
209 {
210 struct ureg_program *shader;
211 struct ureg_src tc[4];
212 struct ureg_src sampler[4];
213 struct ureg_dst texel, ref;
214 struct ureg_dst fragment;
215 unsigned i;
216
217 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
218 if (!shader)
219 return false;
220
221 for (i = 0; i < 4; ++i) {
222 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
223 sampler[i] = ureg_DECL_sampler(shader, i);
224 }
225 texel = ureg_DECL_temporary(shader);
226 ref = ureg_DECL_temporary(shader);
227 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
228
229 /*
230 * texel.r = tex(tc[0], sampler[0])
231 * texel.g = tex(tc[1], sampler[1])
232 * texel.b = tex(tc[2], sampler[2])
233 * ref = tex(tc[3], sampler[3])
234 * fragment = texel * scale + ref
235 */
236 for (i = 0; i < 3; ++i) {
237 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
238 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
239 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
240 }
241 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
242 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
243
244 ureg_release_temporary(shader, texel);
245 ureg_release_temporary(shader, ref);
246 ureg_END(shader);
247
248 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
249 if (!r->p_fs[0])
250 return false;
251
252 return true;
253 }
254
255 #if 0
256 static void
257 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
258 {
259 assert(false);
260 }
261 #endif
262
263 static bool
264 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
265 {
266 struct ureg_program *shader;
267 struct ureg_src vpos, vtex[5];
268 struct ureg_dst o_vpos, o_vtex[5];
269 unsigned i;
270
271 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
272 if (!shader)
273 return false;
274
275 vpos = ureg_DECL_vs_input(shader, 0);
276 for (i = 0; i < 4; ++i)
277 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
278 /* Skip input 5 */
279 vtex[4] = ureg_DECL_vs_input(shader, 6);
280 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
281 for (i = 0; i < 5; ++i)
282 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
283
284 /*
285 * o_vpos = vpos
286 * o_vtex[0..2] = vtex[0..2]
287 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
288 */
289 ureg_MOV(shader, o_vpos, vpos);
290 for (i = 0; i < 3; ++i)
291 ureg_MOV(shader, o_vtex[i], vtex[i]);
292 for (i = 3; i < 5; ++i)
293 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
294
295 ureg_END(shader);
296
297 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
298 if (!r->b_vs[0])
299 return false;
300
301 return true;
302 }
303
304 #if 0
305 static void
306 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
307 {
308 assert(false);
309 }
310 #endif
311
312 static bool
313 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
314 {
315 struct ureg_program *shader;
316 struct ureg_src tc[5];
317 struct ureg_src sampler[5];
318 struct ureg_dst texel, ref[2];
319 struct ureg_dst fragment;
320 unsigned i;
321
322 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
323 if (!shader)
324 return false;
325
326 for (i = 0; i < 5; ++i) {
327 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
328 sampler[i] = ureg_DECL_sampler(shader, i);
329 }
330 texel = ureg_DECL_temporary(shader);
331 ref[0] = ureg_DECL_temporary(shader);
332 ref[1] = ureg_DECL_temporary(shader);
333 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
334
335 /*
336 * texel.r = tex(tc[0], sampler[0])
337 * texel.g = tex(tc[1], sampler[1])
338 * texel.b = tex(tc[2], sampler[2])
339 * ref[0..1 = tex(tc[3..4], sampler[3..4])
340 * ref[0] = lerp(ref[0], ref[1], 0.5)
341 * fragment = texel * scale + ref[0]
342 */
343 for (i = 0; i < 3; ++i) {
344 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
345 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
346 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
347 }
348 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
349 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
350 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
351
352 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
353
354 ureg_release_temporary(shader, texel);
355 ureg_release_temporary(shader, ref[0]);
356 ureg_release_temporary(shader, ref[1]);
357 ureg_END(shader);
358
359 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
360 if (!r->b_fs[0])
361 return false;
362
363 return true;
364 }
365
366 #if 0
367 static void
368 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
369 {
370 assert(false);
371 }
372 #endif
373
374 static void
375 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
376 {
377 unsigned i;
378
379 assert(r);
380
381 for (i = 0; i < 3; ++i) {
382 struct pipe_box rect =
383 {
384 0, 0, 0,
385 r->textures.all[i]->width0,
386 r->textures.all[i]->height0,
387 0
388 };
389
390 r->tex_transfer[i] = r->pipe->get_transfer
391 (
392 r->pipe, r->textures.all[i],
393 u_subresource(0, 0),
394 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
395 &rect
396 );
397
398 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
399 }
400 }
401
402 static void
403 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
404 {
405 unsigned i;
406
407 assert(r);
408
409 for (i = 0; i < 3; ++i) {
410 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
411 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
412 }
413 }
414
415 static bool
416 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
417 {
418 struct pipe_sampler_state sampler;
419 unsigned filters[5];
420 unsigned i;
421
422 assert(r);
423
424 r->viewport.scale[0] = r->pot_buffers ?
425 util_next_power_of_two(r->picture_width) : r->picture_width;
426 r->viewport.scale[1] = r->pot_buffers ?
427 util_next_power_of_two(r->picture_height) : r->picture_height;
428 r->viewport.scale[2] = 1;
429 r->viewport.scale[3] = 1;
430 r->viewport.translate[0] = 0;
431 r->viewport.translate[1] = 0;
432 r->viewport.translate[2] = 0;
433 r->viewport.translate[3] = 0;
434
435 r->fb_state.width = r->pot_buffers ?
436 util_next_power_of_two(r->picture_width) : r->picture_width;
437 r->fb_state.height = r->pot_buffers ?
438 util_next_power_of_two(r->picture_height) : r->picture_height;
439 r->fb_state.nr_cbufs = 1;
440 r->fb_state.zsbuf = NULL;
441
442 /* Luma filter */
443 filters[0] = PIPE_TEX_FILTER_NEAREST;
444 /* Chroma filters */
445 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
446 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
447 filters[1] = PIPE_TEX_FILTER_NEAREST;
448 filters[2] = PIPE_TEX_FILTER_NEAREST;
449 }
450 else {
451 filters[1] = PIPE_TEX_FILTER_LINEAR;
452 filters[2] = PIPE_TEX_FILTER_LINEAR;
453 }
454 /* Fwd, bkwd ref filters */
455 filters[3] = PIPE_TEX_FILTER_LINEAR;
456 filters[4] = PIPE_TEX_FILTER_LINEAR;
457
458 for (i = 0; i < 5; ++i) {
459 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
460 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
461 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
462 sampler.min_img_filter = filters[i];
463 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
464 sampler.mag_img_filter = filters[i];
465 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
466 sampler.compare_func = PIPE_FUNC_ALWAYS;
467 sampler.normalized_coords = 1;
468 /*sampler.shadow_ambient = ; */
469 /*sampler.lod_bias = ; */
470 sampler.min_lod = 0;
471 /*sampler.max_lod = ; */
472 /*sampler.border_color[i] = ; */
473 /*sampler.max_anisotropy = ; */
474 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
475 }
476
477 return true;
478 }
479
480 static void
481 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
482 {
483 unsigned i;
484
485 assert(r);
486
487 for (i = 0; i < 5; ++i)
488 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
489 }
490
491 static bool
492 init_shaders(struct vl_mpeg12_mc_renderer *r)
493 {
494 assert(r);
495
496 create_intra_vert_shader(r);
497 create_intra_frag_shader(r);
498 create_frame_pred_vert_shader(r);
499 create_frame_pred_frag_shader(r);
500 create_frame_bi_pred_vert_shader(r);
501 create_frame_bi_pred_frag_shader(r);
502
503 return true;
504 }
505
506 static void
507 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
508 {
509 assert(r);
510
511 r->pipe->delete_vs_state(r->pipe, r->i_vs);
512 r->pipe->delete_fs_state(r->pipe, r->i_fs);
513 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
514 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
515 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
516 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
517 }
518
519 static bool
520 init_buffers(struct vl_mpeg12_mc_renderer *r)
521 {
522 struct pipe_resource template;
523 struct pipe_vertex_element vertex_elems[8];
524 struct pipe_sampler_view sampler_view;
525
526 const unsigned mbw =
527 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
528 const unsigned mbh =
529 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
530
531 unsigned i;
532
533 assert(r);
534
535 r->macroblocks_per_batch =
536 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
537 r->num_macroblocks = 0;
538 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
539
540 memset(&template, 0, sizeof(struct pipe_resource));
541 template.target = PIPE_TEXTURE_2D;
542 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
543 template.format = PIPE_FORMAT_R16_SNORM;
544 template.last_level = 0;
545 template.width0 = r->pot_buffers ?
546 util_next_power_of_two(r->picture_width) : r->picture_width;
547 template.height0 = r->pot_buffers ?
548 util_next_power_of_two(r->picture_height) : r->picture_height;
549 template.depth0 = 1;
550 template.usage = PIPE_USAGE_DYNAMIC;
551 template.bind = PIPE_BIND_SAMPLER_VIEW;
552 template.flags = 0;
553
554 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
555
556 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
557 template.width0 = r->pot_buffers ?
558 util_next_power_of_two(r->picture_width / 2) :
559 r->picture_width / 2;
560 template.height0 = r->pot_buffers ?
561 util_next_power_of_two(r->picture_height / 2) :
562 r->picture_height / 2;
563 }
564 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
565 template.height0 = r->pot_buffers ?
566 util_next_power_of_two(r->picture_height / 2) :
567 r->picture_height / 2;
568
569 r->textures.individual.cb =
570 r->pipe->screen->resource_create(r->pipe->screen, &template);
571 r->textures.individual.cr =
572 r->pipe->screen->resource_create(r->pipe->screen, &template);
573
574 for (i = 0; i < 3; ++i) {
575 u_sampler_view_default_template(&sampler_view,
576 r->textures.all[i],
577 r->textures.all[i]->format);
578 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
579 }
580
581 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
582 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
583 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
584 /* XXX: Create with usage DYNAMIC or STREAM */
585 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
586 (
587 r->pipe->screen,
588 PIPE_BIND_VERTEX_BUFFER,
589 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
590 );
591
592 for (i = 1; i < 3; ++i) {
593 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
594 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
595 r->vertex_bufs.all[i].buffer_offset = 0;
596 /* XXX: Create with usage DYNAMIC or STREAM */
597 r->vertex_bufs.all[i].buffer = pipe_buffer_create
598 (
599 r->pipe->screen,
600 PIPE_BIND_VERTEX_BUFFER,
601 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
602 );
603 }
604
605 /* Position element */
606 vertex_elems[0].src_offset = 0;
607 vertex_elems[0].instance_divisor = 0;
608 vertex_elems[0].vertex_buffer_index = 0;
609 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
610
611 /* Luma, texcoord element */
612 vertex_elems[1].src_offset = sizeof(struct vertex2f);
613 vertex_elems[1].instance_divisor = 0;
614 vertex_elems[1].vertex_buffer_index = 0;
615 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
616
617 /* Chroma Cr texcoord element */
618 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
619 vertex_elems[2].instance_divisor = 0;
620 vertex_elems[2].vertex_buffer_index = 0;
621 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
622
623 /* Chroma Cb texcoord element */
624 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
625 vertex_elems[3].instance_divisor = 0;
626 vertex_elems[3].vertex_buffer_index = 0;
627 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
628
629 /* First ref surface top field texcoord element */
630 vertex_elems[4].src_offset = 0;
631 vertex_elems[4].instance_divisor = 0;
632 vertex_elems[4].vertex_buffer_index = 1;
633 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
634
635 /* First ref surface bottom field texcoord element */
636 vertex_elems[5].src_offset = sizeof(struct vertex2f);
637 vertex_elems[5].instance_divisor = 0;
638 vertex_elems[5].vertex_buffer_index = 1;
639 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
640
641 /* Second ref surface top field texcoord element */
642 vertex_elems[6].src_offset = 0;
643 vertex_elems[6].instance_divisor = 0;
644 vertex_elems[6].vertex_buffer_index = 2;
645 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
646
647 /* Second ref surface bottom field texcoord element */
648 vertex_elems[7].src_offset = sizeof(struct vertex2f);
649 vertex_elems[7].instance_divisor = 0;
650 vertex_elems[7].vertex_buffer_index = 2;
651 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
652
653 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
654 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
655 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
656
657 r->vs_const_buf = pipe_buffer_create
658 (
659 r->pipe->screen,
660 PIPE_BIND_CONSTANT_BUFFER,
661 sizeof(struct vertex_shader_consts)
662 );
663
664 return true;
665 }
666
667 static void
668 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
669 {
670 unsigned i;
671
672 assert(r);
673
674 pipe_resource_reference(&r->vs_const_buf, NULL);
675
676 for (i = 0; i < 3; ++i) {
677 r->pipe->sampler_view_destroy(r->pipe, r->sampler_views.all[i]);
678 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
679 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
680 pipe_resource_reference(&r->textures.all[i], NULL);
681 }
682
683 FREE(r->macroblock_buf);
684 }
685
686 static enum MACROBLOCK_TYPE
687 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
688 {
689 assert(mb);
690
691 switch (mb->mb_type) {
692 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
693 return MACROBLOCK_TYPE_INTRA;
694 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
695 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
696 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
697 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
698 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
699 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
700 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
701 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
702 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
703 default:
704 assert(0);
705 }
706
707 /* Unreachable */
708 return -1;
709 }
710
711 static void
712 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
713 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
714 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
715 bool use_zeroblocks, struct vertex2f *zero_blocks)
716 {
717 struct vertex2f v;
718
719 assert(vb);
720 assert(unit && half && offset);
721 assert(zero_blocks || !use_zeroblocks);
722
723 /* Generate vertices for two triangles covering a block */
724 v.x = mbx * unit->x + offset->x;
725 v.y = mby * unit->y + offset->y;
726
727 vb[0].pos.x = v.x;
728 vb[0].pos.y = v.y;
729 vb[1].pos.x = v.x;
730 vb[1].pos.y = v.y + half->y;
731 vb[2].pos.x = v.x + half->x;
732 vb[2].pos.y = v.y;
733 vb[3].pos.x = v.x + half->x;
734 vb[3].pos.y = v.y;
735 vb[4].pos.x = v.x;
736 vb[4].pos.y = v.y + half->y;
737 vb[5].pos.x = v.x + half->x;
738 vb[5].pos.y = v.y + half->y;
739
740 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
741 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
742 for this channel is defined for this block) */
743
744 if (!use_zeroblocks || cbp & luma_mask) {
745 v.x = mbx * unit->x + offset->x;
746 v.y = mby * unit->y + offset->y;
747 }
748 else {
749 v.x = zero_blocks[0].x;
750 v.y = zero_blocks[0].y;
751 }
752
753 vb[0].luma_tc.x = v.x;
754 vb[0].luma_tc.y = v.y;
755 vb[1].luma_tc.x = v.x;
756 vb[1].luma_tc.y = v.y + half->y;
757 vb[2].luma_tc.x = v.x + half->x;
758 vb[2].luma_tc.y = v.y;
759 vb[3].luma_tc.x = v.x + half->x;
760 vb[3].luma_tc.y = v.y;
761 vb[4].luma_tc.x = v.x;
762 vb[4].luma_tc.y = v.y + half->y;
763 vb[5].luma_tc.x = v.x + half->x;
764 vb[5].luma_tc.y = v.y + half->y;
765
766 if (!use_zeroblocks || cbp & cb_mask) {
767 v.x = mbx * unit->x + offset->x;
768 v.y = mby * unit->y + offset->y;
769 }
770 else {
771 v.x = zero_blocks[1].x;
772 v.y = zero_blocks[1].y;
773 }
774
775 vb[0].cb_tc.x = v.x;
776 vb[0].cb_tc.y = v.y;
777 vb[1].cb_tc.x = v.x;
778 vb[1].cb_tc.y = v.y + half->y;
779 vb[2].cb_tc.x = v.x + half->x;
780 vb[2].cb_tc.y = v.y;
781 vb[3].cb_tc.x = v.x + half->x;
782 vb[3].cb_tc.y = v.y;
783 vb[4].cb_tc.x = v.x;
784 vb[4].cb_tc.y = v.y + half->y;
785 vb[5].cb_tc.x = v.x + half->x;
786 vb[5].cb_tc.y = v.y + half->y;
787
788 if (!use_zeroblocks || cbp & cr_mask) {
789 v.x = mbx * unit->x + offset->x;
790 v.y = mby * unit->y + offset->y;
791 }
792 else {
793 v.x = zero_blocks[2].x;
794 v.y = zero_blocks[2].y;
795 }
796
797 vb[0].cr_tc.x = v.x;
798 vb[0].cr_tc.y = v.y;
799 vb[1].cr_tc.x = v.x;
800 vb[1].cr_tc.y = v.y + half->y;
801 vb[2].cr_tc.x = v.x + half->x;
802 vb[2].cr_tc.y = v.y;
803 vb[3].cr_tc.x = v.x + half->x;
804 vb[3].cr_tc.y = v.y;
805 vb[4].cr_tc.x = v.x;
806 vb[4].cr_tc.y = v.y + half->y;
807 vb[5].cr_tc.x = v.x + half->x;
808 vb[5].cr_tc.y = v.y + half->y;
809 }
810
811 static void
812 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
813 struct pipe_mpeg12_macroblock *mb, unsigned pos,
814 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
815 {
816 struct vertex2f mo_vec[2];
817
818 unsigned i;
819
820 assert(r);
821 assert(mb);
822 assert(ycbcr_vb);
823 assert(pos < r->macroblocks_per_batch);
824
825 mo_vec[1].x = 0;
826 mo_vec[1].y = 0;
827
828 switch (mb->mb_type) {
829 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
830 {
831 struct vertex2f *vb;
832
833 assert(ref_vb && ref_vb[1]);
834
835 vb = ref_vb[1] + pos * 2 * 24;
836
837 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
838 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
839
840 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
841 for (i = 0; i < 24 * 2; i += 2) {
842 vb[i].x = mo_vec[0].x;
843 vb[i].y = mo_vec[0].y;
844 }
845 }
846 else {
847 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
848 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
849
850 for (i = 0; i < 24 * 2; i += 2) {
851 vb[i].x = mo_vec[0].x;
852 vb[i].y = mo_vec[0].y;
853 vb[i + 1].x = mo_vec[1].x;
854 vb[i + 1].y = mo_vec[1].y;
855 }
856 }
857
858 /* fall-through */
859 }
860 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
861 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
862 {
863 struct vertex2f *vb;
864
865 assert(ref_vb && ref_vb[0]);
866
867 vb = ref_vb[0] + pos * 2 * 24;
868
869 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
870 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
871 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
872
873 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
874 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
875 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
876 }
877 }
878 else {
879 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
880 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
881
882 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
883 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
884 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
885 }
886 }
887
888 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
889 for (i = 0; i < 24 * 2; i += 2) {
890 vb[i].x = mo_vec[0].x;
891 vb[i].y = mo_vec[0].y;
892 }
893 }
894 else {
895 for (i = 0; i < 24 * 2; i += 2) {
896 vb[i].x = mo_vec[0].x;
897 vb[i].y = mo_vec[0].y;
898 vb[i + 1].x = mo_vec[1].x;
899 vb[i + 1].y = mo_vec[1].y;
900 }
901 }
902
903 /* fall-through */
904 }
905 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
906 {
907 const struct vertex2f unit =
908 {
909 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
910 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
911 };
912 const struct vertex2f half =
913 {
914 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
915 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
916 };
917 const struct vertex2f offsets[2][2] =
918 {
919 {
920 {0, 0}, {0, half.y}
921 },
922 {
923 {half.x, 0}, {half.x, half.y}
924 }
925 };
926 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
927
928 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
929
930 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
931 &unit, &half, &offsets[0][0],
932 32, 2, 1, use_zb, r->zero_block);
933
934 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
935 &unit, &half, &offsets[1][0],
936 16, 2, 1, use_zb, r->zero_block);
937
938 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
939 &unit, &half, &offsets[0][1],
940 8, 2, 1, use_zb, r->zero_block);
941
942 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
943 &unit, &half, &offsets[1][1],
944 4, 2, 1, use_zb, r->zero_block);
945
946 break;
947 }
948 default:
949 assert(0);
950 }
951 }
952
953 static void
954 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
955 unsigned *num_macroblocks)
956 {
957 unsigned offset[NUM_MACROBLOCK_TYPES];
958 struct vert_stream_0 *ycbcr_vb;
959 struct vertex2f *ref_vb[2];
960 struct pipe_transfer *buf_transfer[3];
961 unsigned i;
962
963 assert(r);
964 assert(num_macroblocks);
965
966 for (i = 0; i < r->num_macroblocks; ++i) {
967 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
968 ++num_macroblocks[mb_type];
969 }
970
971 offset[0] = 0;
972
973 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
974 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
975
976 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
977 (
978 r->pipe,
979 r->vertex_bufs.individual.ycbcr.buffer,
980 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
981 &buf_transfer[0]
982 );
983
984 for (i = 0; i < 2; ++i)
985 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
986 (
987 r->pipe,
988 r->vertex_bufs.individual.ref[i].buffer,
989 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
990 &buf_transfer[i + 1]
991 );
992
993 for (i = 0; i < r->num_macroblocks; ++i) {
994 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
995
996 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
997 ycbcr_vb, ref_vb);
998
999 ++offset[mb_type];
1000 }
1001
1002 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1003 for (i = 0; i < 2; ++i)
1004 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1005 }
1006
1007 static void
1008 flush(struct vl_mpeg12_mc_renderer *r)
1009 {
1010 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1011 unsigned vb_start = 0;
1012 struct vertex_shader_consts *vs_consts;
1013 struct pipe_transfer *buf_transfer;
1014 unsigned i;
1015
1016 assert(r);
1017 assert(r->num_macroblocks == r->macroblocks_per_batch);
1018
1019 gen_macroblock_stream(r, num_macroblocks);
1020
1021 r->fb_state.cbufs[0] = r->surface;
1022
1023 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1024 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1025
1026 vs_consts = pipe_buffer_map
1027 (
1028 r->pipe, r->vs_const_buf,
1029 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1030 &buf_transfer
1031 );
1032
1033 vs_consts->denorm.x = r->surface->width;
1034 vs_consts->denorm.y = r->surface->height;
1035
1036 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1037
1038 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1039 r->vs_const_buf);
1040
1041 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1042 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1043 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1044 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1045 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1046 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1047 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1048
1049 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1050 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1051 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1052 }
1053
1054 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0*/) {
1055 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1056 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1057 r->textures.individual.ref[0] = r->past->texture;
1058 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1059 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1060 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1061 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1062
1063 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1064 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1065 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1066 }
1067
1068 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1069 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1070 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1071 r->textures.individual.ref[0] = r->past->texture;
1072 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1073 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1074 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1075 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1076
1077 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1078 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1079 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1080 }
1081
1082 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0*/) {
1083 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1084 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1085 r->textures.individual.ref[0] = r->future->texture;
1086 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1087 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1088 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1089 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1090
1091 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1092 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1093 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1094 }
1095
1096 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
1097 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1098 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1099 r->textures.individual.ref[0] = r->future->texture;
1100 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1101 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1102 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1103 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1104
1105 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1106 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1107 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1108 }
1109
1110 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0*/) {
1111 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1112 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1113 r->textures.individual.ref[0] = r->past->texture;
1114 r->textures.individual.ref[1] = r->future->texture;
1115 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1116 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1117 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1118 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1119
1120 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1121 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1122 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1123 }
1124
1125 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1126 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1127 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1128 r->textures.individual.ref[0] = r->past->texture;
1129 r->textures.individual.ref[1] = r->future->texture;
1130 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1131 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1132 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1133 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1134
1135 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1136 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1137 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1138 }
1139
1140 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1141
1142 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1143 for (i = 0; i < 3; ++i)
1144 r->zero_block[i].x = ZERO_BLOCK_NIL;
1145
1146 r->num_macroblocks = 0;
1147 }
1148
1149 static void
1150 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1151 {
1152 unsigned y;
1153
1154 assert(src);
1155 assert(dst);
1156
1157 for (y = 0; y < BLOCK_HEIGHT; ++y)
1158 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1159 }
1160
1161 static void
1162 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1163 {
1164 unsigned y;
1165
1166 assert(src);
1167 assert(dst);
1168
1169 for (y = 0; y < BLOCK_HEIGHT; ++y)
1170 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1171 }
1172
1173 static void
1174 fill_zero_block(short *dst, unsigned dst_pitch)
1175 {
1176 unsigned y;
1177
1178 assert(dst);
1179
1180 for (y = 0; y < BLOCK_HEIGHT; ++y)
1181 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1182 }
1183
1184 static void
1185 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1186 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1187 {
1188 unsigned tex_pitch;
1189 short *texels;
1190 unsigned tb = 0, sb = 0;
1191 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1192 unsigned x, y;
1193
1194 assert(r);
1195 assert(blocks);
1196
1197 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1198 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1199
1200 for (y = 0; y < 2; ++y) {
1201 for (x = 0; x < 2; ++x, ++tb) {
1202 if ((cbp >> (5 - tb)) & 1) {
1203 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1204 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1205 texels + y * tex_pitch * BLOCK_WIDTH +
1206 x * BLOCK_WIDTH, tex_pitch);
1207 }
1208 else {
1209 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1210 texels + y * tex_pitch + x * BLOCK_WIDTH,
1211 tex_pitch);
1212 }
1213
1214 ++sb;
1215 }
1216 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1217 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1218 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1219 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1220 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1221 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1222 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1223 }
1224 }
1225 }
1226 }
1227 }
1228
1229 /* TODO: Implement 422, 444 */
1230 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1231
1232 mbpx /= 2;
1233 mbpy /= 2;
1234
1235 for (tb = 0; tb < 2; ++tb) {
1236 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1237 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1238
1239 if ((cbp >> (1 - tb)) & 1) {
1240 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1241 ++sb;
1242 }
1243 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1244 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1245 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1246 fill_zero_block(texels, tex_pitch);
1247 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1248 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1249 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1250 }
1251 }
1252 }
1253 }
1254 }
1255
1256 static void
1257 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1258 struct pipe_mpeg12_macroblock *mb)
1259 {
1260 assert(r);
1261 assert(mb);
1262 assert(mb->blocks);
1263 assert(r->num_macroblocks < r->macroblocks_per_batch);
1264
1265 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1266 sizeof(struct pipe_mpeg12_macroblock));
1267
1268 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1269
1270 ++r->num_macroblocks;
1271 }
1272
1273 bool
1274 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1275 struct pipe_context *pipe,
1276 unsigned picture_width,
1277 unsigned picture_height,
1278 enum pipe_video_chroma_format chroma_format,
1279 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1280 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1281 bool pot_buffers)
1282 {
1283 unsigned i;
1284
1285 assert(renderer);
1286 assert(pipe);
1287 /* TODO: Implement other policies */
1288 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1289 /* TODO: Implement this */
1290 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1291 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1292 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1293 assert(pot_buffers);
1294
1295 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1296
1297 renderer->pipe = pipe;
1298 renderer->picture_width = picture_width;
1299 renderer->picture_height = picture_height;
1300 renderer->chroma_format = chroma_format;
1301 renderer->bufmode = bufmode;
1302 renderer->eb_handling = eb_handling;
1303 renderer->pot_buffers = pot_buffers;
1304
1305 if (!init_pipe_state(renderer))
1306 return false;
1307 if (!init_shaders(renderer)) {
1308 cleanup_pipe_state(renderer);
1309 return false;
1310 }
1311 if (!init_buffers(renderer)) {
1312 cleanup_shaders(renderer);
1313 cleanup_pipe_state(renderer);
1314 return false;
1315 }
1316
1317 renderer->surface = NULL;
1318 renderer->past = NULL;
1319 renderer->future = NULL;
1320 for (i = 0; i < 3; ++i)
1321 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1322 renderer->num_macroblocks = 0;
1323
1324 xfer_buffers_map(renderer);
1325
1326 return true;
1327 }
1328
1329 void
1330 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1331 {
1332 assert(renderer);
1333
1334 xfer_buffers_unmap(renderer);
1335
1336 cleanup_pipe_state(renderer);
1337 cleanup_shaders(renderer);
1338 cleanup_buffers(renderer);
1339
1340 pipe_surface_reference(&renderer->surface, NULL);
1341 pipe_surface_reference(&renderer->past, NULL);
1342 pipe_surface_reference(&renderer->future, NULL);
1343 }
1344
1345 void
1346 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1347 *renderer,
1348 struct pipe_surface *surface,
1349 struct pipe_surface *past,
1350 struct pipe_surface *future,
1351 unsigned num_macroblocks,
1352 struct pipe_mpeg12_macroblock
1353 *mpeg12_macroblocks,
1354 struct pipe_fence_handle **fence)
1355 {
1356 bool new_surface = false;
1357
1358 assert(renderer);
1359 assert(surface);
1360 assert(num_macroblocks);
1361 assert(mpeg12_macroblocks);
1362
1363 if (renderer->surface) {
1364 if (surface != renderer->surface) {
1365 if (renderer->num_macroblocks > 0) {
1366 xfer_buffers_unmap(renderer);
1367 flush(renderer);
1368 }
1369
1370 new_surface = true;
1371 }
1372
1373 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1374 assert(surface != renderer->surface || renderer->past == past);
1375 assert(surface != renderer->surface || renderer->future == future);
1376 }
1377 else
1378 new_surface = true;
1379
1380 if (new_surface) {
1381 pipe_surface_reference(&renderer->surface, surface);
1382 pipe_surface_reference(&renderer->past, past);
1383 pipe_surface_reference(&renderer->future, future);
1384 renderer->fence = fence;
1385 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1386 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1387 }
1388
1389 while (num_macroblocks) {
1390 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1391 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1392 unsigned i;
1393
1394 for (i = 0; i < num_to_submit; ++i) {
1395 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1396 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1397 }
1398
1399 num_macroblocks -= num_to_submit;
1400
1401 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1402 xfer_buffers_unmap(renderer);
1403 flush(renderer);
1404 xfer_buffers_map(renderer);
1405 /* Next time we get this surface it may have new ref frames */
1406 pipe_surface_reference(&renderer->surface, NULL);
1407 pipe_surface_reference(&renderer->past, NULL);
1408 pipe_surface_reference(&renderer->future, NULL);
1409 }
1410 }
1411 }