Merge branch 'master' into pipe-video
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_math.h>
33 #include <util/u_memory.h>
34 #include <tgsi/tgsi_ureg.h>
35
36 #define DEFAULT_BUF_ALIGNMENT 1
37 #define MACROBLOCK_WIDTH 16
38 #define MACROBLOCK_HEIGHT 16
39 #define BLOCK_WIDTH 8
40 #define BLOCK_HEIGHT 8
41 #define ZERO_BLOCK_NIL -1.0f
42 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
43 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
44
45 struct vertex_shader_consts
46 {
47 struct vertex4f denorm;
48 };
49
50 struct fragment_shader_consts
51 {
52 struct vertex4f multiplier;
53 struct vertex4f div;
54 };
55
56 struct vert_stream_0
57 {
58 struct vertex2f pos;
59 struct vertex2f luma_tc;
60 struct vertex2f cb_tc;
61 struct vertex2f cr_tc;
62 };
63
64 enum MACROBLOCK_TYPE
65 {
66 MACROBLOCK_TYPE_INTRA,
67 MACROBLOCK_TYPE_FWD_FRAME_PRED,
68 MACROBLOCK_TYPE_FWD_FIELD_PRED,
69 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
70 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
71 MACROBLOCK_TYPE_BI_FRAME_PRED,
72 MACROBLOCK_TYPE_BI_FIELD_PRED,
73
74 NUM_MACROBLOCK_TYPES
75 };
76
77 static bool
78 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
79 {
80 struct ureg_program *shader;
81 struct ureg_src vpos, vtex[3];
82 struct ureg_dst o_vpos, o_vtex[3];
83 unsigned i;
84
85 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
86 if (!shader)
87 return false;
88
89 vpos = ureg_DECL_vs_input(shader, 0);
90 for (i = 0; i < 3; ++i)
91 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
92 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
93 for (i = 0; i < 3; ++i)
94 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
95
96 /*
97 * o_vpos = vpos
98 * o_vtex[0..2] = vtex[0..2]
99 */
100 ureg_MOV(shader, o_vpos, vpos);
101 for (i = 0; i < 3; ++i)
102 ureg_MOV(shader, o_vtex[i], vtex[i]);
103
104 ureg_END(shader);
105
106 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
107 if (!r->i_vs)
108 return false;
109
110 return true;
111 }
112
113 static bool
114 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
115 {
116 struct ureg_program *shader;
117 struct ureg_src tc[3];
118 struct ureg_src sampler[3];
119 struct ureg_dst texel, temp;
120 struct ureg_dst fragment;
121 unsigned i;
122
123 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
124 if (!shader)
125 return false;
126
127 for (i = 0; i < 3; ++i) {
128 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
129 sampler[i] = ureg_DECL_sampler(shader, i);
130 }
131 texel = ureg_DECL_temporary(shader);
132 temp = ureg_DECL_temporary(shader);
133 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
134
135 /*
136 * texel.r = tex(tc[0], sampler[0])
137 * texel.g = tex(tc[1], sampler[1])
138 * texel.b = tex(tc[2], sampler[2])
139 * fragment = texel * scale
140 */
141 for (i = 0; i < 3; ++i) {
142 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
143 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
144 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
145 }
146 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
147
148 ureg_release_temporary(shader, texel);
149 ureg_release_temporary(shader, temp);
150 ureg_END(shader);
151
152 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
153 if (!r->i_fs)
154 return false;
155
156 return true;
157 }
158
159 static bool
160 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
161 {
162 struct ureg_program *shader;
163 struct ureg_src vpos, vtex[4];
164 struct ureg_dst o_vpos, o_vtex[4];
165 unsigned i;
166
167 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
168 if (!shader)
169 return false;
170
171 vpos = ureg_DECL_vs_input(shader, 0);
172 for (i = 0; i < 4; ++i)
173 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
174 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
175 for (i = 0; i < 4; ++i)
176 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
177
178 /*
179 * o_vpos = vpos
180 * o_vtex[0..2] = vtex[0..2]
181 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
182 */
183 ureg_MOV(shader, o_vpos, vpos);
184 for (i = 0; i < 3; ++i)
185 ureg_MOV(shader, o_vtex[i], vtex[i]);
186 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
187
188 ureg_END(shader);
189
190 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
191 if (!r->p_vs[0])
192 return false;
193
194 return true;
195 }
196
197 static void
198 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
199 {
200 assert(false);
201 }
202
203 static bool
204 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
205 {
206 struct ureg_program *shader;
207 struct ureg_src tc[4];
208 struct ureg_src sampler[4];
209 struct ureg_dst texel, ref;
210 struct ureg_dst fragment;
211 unsigned i;
212
213 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
214 if (!shader)
215 return false;
216
217 for (i = 0; i < 4; ++i) {
218 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
219 sampler[i] = ureg_DECL_sampler(shader, i);
220 }
221 texel = ureg_DECL_temporary(shader);
222 ref = ureg_DECL_temporary(shader);
223 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
224
225 /*
226 * texel.r = tex(tc[0], sampler[0])
227 * texel.g = tex(tc[1], sampler[1])
228 * texel.b = tex(tc[2], sampler[2])
229 * ref = tex(tc[3], sampler[3])
230 * fragment = texel * scale + ref
231 */
232 for (i = 0; i < 3; ++i) {
233 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
234 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
235 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
236 }
237 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
238 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
239
240 ureg_release_temporary(shader, texel);
241 ureg_release_temporary(shader, ref);
242 ureg_END(shader);
243
244 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
245 if (!r->p_fs[0])
246 return false;
247
248 return true;
249 }
250
251 static void
252 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
253 {
254 assert(false);
255 }
256
257 static bool
258 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
259 {
260 struct ureg_program *shader;
261 struct ureg_src vpos, vtex[5];
262 struct ureg_dst o_vpos, o_vtex[5];
263 unsigned i;
264
265 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
266 if (!shader)
267 return false;
268
269 vpos = ureg_DECL_vs_input(shader, 0);
270 for (i = 0; i < 4; ++i)
271 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
272 /* Skip input 5 */
273 vtex[4] = ureg_DECL_vs_input(shader, 6);
274 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
275 for (i = 0; i < 5; ++i)
276 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
277
278 /*
279 * o_vpos = vpos
280 * o_vtex[0..2] = vtex[0..2]
281 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
282 */
283 ureg_MOV(shader, o_vpos, vpos);
284 for (i = 0; i < 3; ++i)
285 ureg_MOV(shader, o_vtex[i], vtex[i]);
286 for (i = 3; i < 5; ++i)
287 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
288
289 ureg_END(shader);
290
291 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
292 if (!r->b_vs[0])
293 return false;
294
295 return true;
296 }
297
298 static void
299 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
300 {
301 assert(false);
302 }
303
304 static bool
305 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
306 {
307 struct ureg_program *shader;
308 struct ureg_src tc[5];
309 struct ureg_src sampler[5];
310 struct ureg_dst texel, ref[2];
311 struct ureg_dst fragment;
312 unsigned i;
313
314 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
315 if (!shader)
316 return false;
317
318 for (i = 0; i < 5; ++i) {
319 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
320 sampler[i] = ureg_DECL_sampler(shader, i);
321 }
322 texel = ureg_DECL_temporary(shader);
323 ref[0] = ureg_DECL_temporary(shader);
324 ref[1] = ureg_DECL_temporary(shader);
325 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
326
327 /*
328 * texel.r = tex(tc[0], sampler[0])
329 * texel.g = tex(tc[1], sampler[1])
330 * texel.b = tex(tc[2], sampler[2])
331 * ref[0..1 = tex(tc[3..4], sampler[3..4])
332 * ref[0] = lerp(ref[0], ref[1], 0.5)
333 * fragment = texel * scale + ref[0]
334 */
335 for (i = 0; i < 3; ++i) {
336 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
337 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
338 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
339 }
340 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
341 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
342 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
343
344 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
345
346 ureg_release_temporary(shader, texel);
347 ureg_release_temporary(shader, ref[0]);
348 ureg_release_temporary(shader, ref[1]);
349 ureg_END(shader);
350
351 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
352 if (!r->b_fs[0])
353 return false;
354
355 return true;
356 }
357
358 static void
359 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
360 {
361 assert(false);
362 }
363
364 static void
365 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
366 {
367 unsigned i;
368
369 assert(r);
370
371 for (i = 0; i < 3; ++i) {
372 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
373 (
374 r->pipe->screen, r->textures.all[i],
375 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
376 r->textures.all[i]->width0, r->textures.all[i]->height0
377 );
378
379 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
380 }
381 }
382
383 static void
384 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
385 {
386 unsigned i;
387
388 assert(r);
389
390 for (i = 0; i < 3; ++i) {
391 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
392 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
393 }
394 }
395
396 static bool
397 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
398 {
399 struct pipe_sampler_state sampler;
400 unsigned filters[5];
401 unsigned i;
402
403 assert(r);
404
405 r->viewport.scale[0] = r->pot_buffers ?
406 util_next_power_of_two(r->picture_width) : r->picture_width;
407 r->viewport.scale[1] = r->pot_buffers ?
408 util_next_power_of_two(r->picture_height) : r->picture_height;
409 r->viewport.scale[2] = 1;
410 r->viewport.scale[3] = 1;
411 r->viewport.translate[0] = 0;
412 r->viewport.translate[1] = 0;
413 r->viewport.translate[2] = 0;
414 r->viewport.translate[3] = 0;
415
416 r->scissor.maxx = r->pot_buffers ?
417 util_next_power_of_two(r->picture_width) : r->picture_width;
418 r->scissor.maxy = r->pot_buffers ?
419 util_next_power_of_two(r->picture_height) : r->picture_height;
420
421 r->fb_state.width = r->pot_buffers ?
422 util_next_power_of_two(r->picture_width) : r->picture_width;
423 r->fb_state.height = r->pot_buffers ?
424 util_next_power_of_two(r->picture_height) : r->picture_height;
425 r->fb_state.nr_cbufs = 1;
426 r->fb_state.zsbuf = NULL;
427
428 /* Luma filter */
429 filters[0] = PIPE_TEX_FILTER_NEAREST;
430 /* Chroma filters */
431 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
432 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
433 filters[1] = PIPE_TEX_FILTER_NEAREST;
434 filters[2] = PIPE_TEX_FILTER_NEAREST;
435 }
436 else {
437 filters[1] = PIPE_TEX_FILTER_LINEAR;
438 filters[2] = PIPE_TEX_FILTER_LINEAR;
439 }
440 /* Fwd, bkwd ref filters */
441 filters[3] = PIPE_TEX_FILTER_LINEAR;
442 filters[4] = PIPE_TEX_FILTER_LINEAR;
443
444 for (i = 0; i < 5; ++i) {
445 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
446 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
447 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
448 sampler.min_img_filter = filters[i];
449 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
450 sampler.mag_img_filter = filters[i];
451 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
452 sampler.compare_func = PIPE_FUNC_ALWAYS;
453 sampler.normalized_coords = 1;
454 /*sampler.prefilter = ; */
455 /*sampler.shadow_ambient = ; */
456 /*sampler.lod_bias = ; */
457 sampler.min_lod = 0;
458 /*sampler.max_lod = ; */
459 /*sampler.border_color[i] = ; */
460 /*sampler.max_anisotropy = ; */
461 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
462 }
463
464 return true;
465 }
466
467 static void
468 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
469 {
470 unsigned i;
471
472 assert(r);
473
474 for (i = 0; i < 5; ++i)
475 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
476 }
477
478 static bool
479 init_shaders(struct vl_mpeg12_mc_renderer *r)
480 {
481 assert(r);
482
483 create_intra_vert_shader(r);
484 create_intra_frag_shader(r);
485 create_frame_pred_vert_shader(r);
486 create_frame_pred_frag_shader(r);
487 create_frame_bi_pred_vert_shader(r);
488 create_frame_bi_pred_frag_shader(r);
489
490 return true;
491 }
492
493 static void
494 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
495 {
496 assert(r);
497
498 r->pipe->delete_vs_state(r->pipe, r->i_vs);
499 r->pipe->delete_fs_state(r->pipe, r->i_fs);
500 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
501 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
502 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
503 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
504 }
505
506 static bool
507 init_buffers(struct vl_mpeg12_mc_renderer *r)
508 {
509 struct pipe_texture template;
510
511 const unsigned mbw =
512 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
513 const unsigned mbh =
514 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
515
516 unsigned i;
517
518 assert(r);
519
520 r->macroblocks_per_batch =
521 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
522 r->num_macroblocks = 0;
523 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
524
525 memset(&template, 0, sizeof(struct pipe_texture));
526 template.target = PIPE_TEXTURE_2D;
527 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
528 template.format = PIPE_FORMAT_R16_SNORM;
529 template.last_level = 0;
530 template.width0 = r->pot_buffers ?
531 util_next_power_of_two(r->picture_width) : r->picture_width;
532 template.height0 = r->pot_buffers ?
533 util_next_power_of_two(r->picture_height) : r->picture_height;
534 template.depth0 = 1;
535 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
536
537 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
538
539 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
540 template.width0 = r->pot_buffers ?
541 util_next_power_of_two(r->picture_width / 2) :
542 r->picture_width / 2;
543 template.height0 = r->pot_buffers ?
544 util_next_power_of_two(r->picture_height / 2) :
545 r->picture_height / 2;
546 }
547 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
548 template.height0 = r->pot_buffers ?
549 util_next_power_of_two(r->picture_height / 2) :
550 r->picture_height / 2;
551
552 r->textures.individual.cb =
553 r->pipe->screen->texture_create(r->pipe->screen, &template);
554 r->textures.individual.cr =
555 r->pipe->screen->texture_create(r->pipe->screen, &template);
556
557 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
558 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
559 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
560 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
561 (
562 r->pipe->screen,
563 DEFAULT_BUF_ALIGNMENT,
564 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
565 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
566 );
567
568 for (i = 1; i < 3; ++i) {
569 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
570 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
571 r->vertex_bufs.all[i].buffer_offset = 0;
572 r->vertex_bufs.all[i].buffer = pipe_buffer_create
573 (
574 r->pipe->screen,
575 DEFAULT_BUF_ALIGNMENT,
576 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
577 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
578 );
579 }
580
581 /* Position element */
582 r->vertex_elems[0].src_offset = 0;
583 r->vertex_elems[0].vertex_buffer_index = 0;
584 r->vertex_elems[0].nr_components = 2;
585 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
586
587 /* Luma, texcoord element */
588 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
589 r->vertex_elems[1].vertex_buffer_index = 0;
590 r->vertex_elems[1].nr_components = 2;
591 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
592
593 /* Chroma Cr texcoord element */
594 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
595 r->vertex_elems[2].vertex_buffer_index = 0;
596 r->vertex_elems[2].nr_components = 2;
597 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
598
599 /* Chroma Cb texcoord element */
600 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
601 r->vertex_elems[3].vertex_buffer_index = 0;
602 r->vertex_elems[3].nr_components = 2;
603 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
604
605 /* First ref surface top field texcoord element */
606 r->vertex_elems[4].src_offset = 0;
607 r->vertex_elems[4].vertex_buffer_index = 1;
608 r->vertex_elems[4].nr_components = 2;
609 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
610
611 /* First ref surface bottom field texcoord element */
612 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
613 r->vertex_elems[5].vertex_buffer_index = 1;
614 r->vertex_elems[5].nr_components = 2;
615 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
616
617 /* Second ref surface top field texcoord element */
618 r->vertex_elems[6].src_offset = 0;
619 r->vertex_elems[6].vertex_buffer_index = 2;
620 r->vertex_elems[6].nr_components = 2;
621 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
622
623 /* Second ref surface bottom field texcoord element */
624 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
625 r->vertex_elems[7].vertex_buffer_index = 2;
626 r->vertex_elems[7].nr_components = 2;
627 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
628
629 r->vs_const_buf.buffer = pipe_buffer_create
630 (
631 r->pipe->screen,
632 DEFAULT_BUF_ALIGNMENT,
633 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
634 sizeof(struct vertex_shader_consts)
635 );
636
637 return true;
638 }
639
640 static void
641 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
642 {
643 unsigned i;
644
645 assert(r);
646
647 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
648
649 for (i = 0; i < 3; ++i)
650 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
651
652 for (i = 0; i < 3; ++i)
653 pipe_texture_reference(&r->textures.all[i], NULL);
654
655 FREE(r->macroblock_buf);
656 }
657
658 static enum MACROBLOCK_TYPE
659 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
660 {
661 assert(mb);
662
663 switch (mb->mb_type) {
664 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
665 return MACROBLOCK_TYPE_INTRA;
666 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
667 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
668 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
669 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
670 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
671 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
672 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
673 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
674 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
675 default:
676 assert(0);
677 }
678
679 /* Unreachable */
680 return -1;
681 }
682
683 static void
684 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
685 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
686 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
687 bool use_zeroblocks, struct vertex2f *zero_blocks)
688 {
689 struct vertex2f v;
690
691 assert(vb);
692 assert(unit && half && offset);
693 assert(zero_blocks || !use_zeroblocks);
694
695 /* Generate vertices for two triangles covering a block */
696 v.x = mbx * unit->x + offset->x;
697 v.y = mby * unit->y + offset->y;
698
699 vb[0].pos.x = v.x;
700 vb[0].pos.y = v.y;
701 vb[1].pos.x = v.x;
702 vb[1].pos.y = v.y + half->y;
703 vb[2].pos.x = v.x + half->x;
704 vb[2].pos.y = v.y;
705 vb[3].pos.x = v.x + half->x;
706 vb[3].pos.y = v.y;
707 vb[4].pos.x = v.x;
708 vb[4].pos.y = v.y + half->y;
709 vb[5].pos.x = v.x + half->x;
710 vb[5].pos.y = v.y + half->y;
711
712 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
713 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
714 for this channel is defined for this block) */
715
716 if (!use_zeroblocks || cbp & luma_mask) {
717 v.x = mbx * unit->x + offset->x;
718 v.y = mby * unit->y + offset->y;
719 }
720 else {
721 v.x = zero_blocks[0].x;
722 v.y = zero_blocks[0].y;
723 }
724
725 vb[0].luma_tc.x = v.x;
726 vb[0].luma_tc.y = v.y;
727 vb[1].luma_tc.x = v.x;
728 vb[1].luma_tc.y = v.y + half->y;
729 vb[2].luma_tc.x = v.x + half->x;
730 vb[2].luma_tc.y = v.y;
731 vb[3].luma_tc.x = v.x + half->x;
732 vb[3].luma_tc.y = v.y;
733 vb[4].luma_tc.x = v.x;
734 vb[4].luma_tc.y = v.y + half->y;
735 vb[5].luma_tc.x = v.x + half->x;
736 vb[5].luma_tc.y = v.y + half->y;
737
738 if (!use_zeroblocks || cbp & cb_mask) {
739 v.x = mbx * unit->x + offset->x;
740 v.y = mby * unit->y + offset->y;
741 }
742 else {
743 v.x = zero_blocks[1].x;
744 v.y = zero_blocks[1].y;
745 }
746
747 vb[0].cb_tc.x = v.x;
748 vb[0].cb_tc.y = v.y;
749 vb[1].cb_tc.x = v.x;
750 vb[1].cb_tc.y = v.y + half->y;
751 vb[2].cb_tc.x = v.x + half->x;
752 vb[2].cb_tc.y = v.y;
753 vb[3].cb_tc.x = v.x + half->x;
754 vb[3].cb_tc.y = v.y;
755 vb[4].cb_tc.x = v.x;
756 vb[4].cb_tc.y = v.y + half->y;
757 vb[5].cb_tc.x = v.x + half->x;
758 vb[5].cb_tc.y = v.y + half->y;
759
760 if (!use_zeroblocks || cbp & cr_mask) {
761 v.x = mbx * unit->x + offset->x;
762 v.y = mby * unit->y + offset->y;
763 }
764 else {
765 v.x = zero_blocks[2].x;
766 v.y = zero_blocks[2].y;
767 }
768
769 vb[0].cr_tc.x = v.x;
770 vb[0].cr_tc.y = v.y;
771 vb[1].cr_tc.x = v.x;
772 vb[1].cr_tc.y = v.y + half->y;
773 vb[2].cr_tc.x = v.x + half->x;
774 vb[2].cr_tc.y = v.y;
775 vb[3].cr_tc.x = v.x + half->x;
776 vb[3].cr_tc.y = v.y;
777 vb[4].cr_tc.x = v.x;
778 vb[4].cr_tc.y = v.y + half->y;
779 vb[5].cr_tc.x = v.x + half->x;
780 vb[5].cr_tc.y = v.y + half->y;
781 }
782
783 static void
784 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
785 struct pipe_mpeg12_macroblock *mb, unsigned pos,
786 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
787 {
788 struct vertex2f mo_vec[2];
789
790 unsigned i;
791
792 assert(r);
793 assert(mb);
794 assert(ycbcr_vb);
795 assert(pos < r->macroblocks_per_batch);
796
797 switch (mb->mb_type) {
798 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
799 {
800 struct vertex2f *vb;
801
802 assert(ref_vb && ref_vb[1]);
803
804 vb = ref_vb[1] + pos * 2 * 24;
805
806 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
807 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
808
809 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
810 for (i = 0; i < 24 * 2; i += 2) {
811 vb[i].x = mo_vec[0].x;
812 vb[i].y = mo_vec[0].y;
813 }
814 }
815 else {
816 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
817 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
818
819 for (i = 0; i < 24 * 2; i += 2) {
820 vb[i].x = mo_vec[0].x;
821 vb[i].y = mo_vec[0].y;
822 vb[i + 1].x = mo_vec[1].x;
823 vb[i + 1].y = mo_vec[1].y;
824 }
825 }
826
827 /* fall-through */
828 }
829 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
830 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
831 {
832 struct vertex2f *vb;
833
834 assert(ref_vb && ref_vb[0]);
835
836 vb = ref_vb[0] + pos * 2 * 24;
837
838 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
839 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
840 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
841
842 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
843 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
844 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
845 }
846 }
847 else {
848 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
849 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
850
851 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
852 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
853 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
854 }
855 }
856
857 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
858 for (i = 0; i < 24 * 2; i += 2) {
859 vb[i].x = mo_vec[0].x;
860 vb[i].y = mo_vec[0].y;
861 }
862 }
863 else {
864 for (i = 0; i < 24 * 2; i += 2) {
865 vb[i].x = mo_vec[0].x;
866 vb[i].y = mo_vec[0].y;
867 vb[i + 1].x = mo_vec[1].x;
868 vb[i + 1].y = mo_vec[1].y;
869 }
870 }
871
872 /* fall-through */
873 }
874 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
875 {
876 const struct vertex2f unit =
877 {
878 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
879 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
880 };
881 const struct vertex2f half =
882 {
883 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
884 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
885 };
886 const struct vertex2f offsets[2][2] =
887 {
888 {
889 {0, 0}, {0, half.y}
890 },
891 {
892 {half.x, 0}, {half.x, half.y}
893 }
894 };
895 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
896
897 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
898
899 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
900 &unit, &half, &offsets[0][0],
901 32, 2, 1, use_zb, r->zero_block);
902
903 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
904 &unit, &half, &offsets[1][0],
905 16, 2, 1, use_zb, r->zero_block);
906
907 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
908 &unit, &half, &offsets[0][1],
909 8, 2, 1, use_zb, r->zero_block);
910
911 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
912 &unit, &half, &offsets[1][1],
913 4, 2, 1, use_zb, r->zero_block);
914
915 break;
916 }
917 default:
918 assert(0);
919 }
920 }
921
922 static void
923 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
924 unsigned *num_macroblocks)
925 {
926 unsigned offset[NUM_MACROBLOCK_TYPES];
927 struct vert_stream_0 *ycbcr_vb;
928 struct vertex2f *ref_vb[2];
929 unsigned i;
930
931 assert(r);
932 assert(num_macroblocks);
933
934 for (i = 0; i < r->num_macroblocks; ++i) {
935 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
936 ++num_macroblocks[mb_type];
937 }
938
939 offset[0] = 0;
940
941 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
942 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
943
944 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
945 (
946 r->pipe->screen,
947 r->vertex_bufs.individual.ycbcr.buffer,
948 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
949 );
950
951 for (i = 0; i < 2; ++i)
952 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
953 (
954 r->pipe->screen,
955 r->vertex_bufs.individual.ref[i].buffer,
956 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
957 );
958
959 for (i = 0; i < r->num_macroblocks; ++i) {
960 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
961
962 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
963 ycbcr_vb, ref_vb);
964
965 ++offset[mb_type];
966 }
967
968 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
969 for (i = 0; i < 2; ++i)
970 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
971 }
972
973 static void
974 flush(struct vl_mpeg12_mc_renderer *r)
975 {
976 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
977 unsigned vb_start = 0;
978 struct vertex_shader_consts *vs_consts;
979 unsigned i;
980
981 assert(r);
982 assert(r->num_macroblocks == r->macroblocks_per_batch);
983
984 gen_macroblock_stream(r, num_macroblocks);
985
986 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
987 (
988 r->pipe->screen, r->surface,
989 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
990 );
991
992 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
993 r->pipe->set_viewport_state(r->pipe, &r->viewport);
994 r->pipe->set_scissor_state(r->pipe, &r->scissor);
995
996 vs_consts = pipe_buffer_map
997 (
998 r->pipe->screen, r->vs_const_buf.buffer,
999 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1000 );
1001
1002 vs_consts->denorm.x = r->surface->width0;
1003 vs_consts->denorm.y = r->surface->height0;
1004
1005 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1006
1007 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1008 &r->vs_const_buf);
1009
1010 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1011 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1012 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1013 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1014 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1015 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1016 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1017
1018 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1019 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1020 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1021 }
1022
1023 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1024 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1025 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1026 r->textures.individual.ref[0] = r->past;
1027 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1028 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1029 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1030 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1031
1032 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1033 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1034 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1035 }
1036
1037 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1038 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1039 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1040 r->textures.individual.ref[0] = r->past;
1041 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1042 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1043 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1044 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1045
1046 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1047 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1048 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1049 }
1050
1051 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1052 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1053 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1054 r->textures.individual.ref[0] = r->future;
1055 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1056 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1057 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1058 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1059
1060 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1061 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1062 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1063 }
1064
1065 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1066 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1067 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1068 r->textures.individual.ref[0] = r->future;
1069 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1070 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1071 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1072 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1073
1074 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1075 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1076 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1077 }
1078
1079 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1080 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1081 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1082 r->textures.individual.ref[0] = r->past;
1083 r->textures.individual.ref[1] = r->future;
1084 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1085 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1086 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1087 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1088
1089 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1090 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1091 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1092 }
1093
1094 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1095 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1096 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1097 r->textures.individual.ref[0] = r->past;
1098 r->textures.individual.ref[1] = r->future;
1099 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1100 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1101 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1102 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1103
1104 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1105 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1106 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1107 }
1108
1109 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1110 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1111
1112 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1113 for (i = 0; i < 3; ++i)
1114 r->zero_block[i].x = ZERO_BLOCK_NIL;
1115
1116 r->num_macroblocks = 0;
1117 }
1118
1119 static void
1120 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1121 {
1122 unsigned y;
1123
1124 assert(src);
1125 assert(dst);
1126
1127 for (y = 0; y < BLOCK_HEIGHT; ++y)
1128 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1129 }
1130
1131 static void
1132 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1133 {
1134 unsigned y;
1135
1136 assert(src);
1137 assert(dst);
1138
1139 for (y = 0; y < BLOCK_HEIGHT; ++y)
1140 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1141 }
1142
1143 static void
1144 fill_zero_block(short *dst, unsigned dst_pitch)
1145 {
1146 unsigned y;
1147
1148 assert(dst);
1149
1150 for (y = 0; y < BLOCK_HEIGHT; ++y)
1151 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1152 }
1153
1154 static void
1155 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1156 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1157 {
1158 unsigned tex_pitch;
1159 short *texels;
1160 unsigned tb = 0, sb = 0;
1161 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1162 unsigned x, y;
1163
1164 assert(r);
1165 assert(blocks);
1166
1167 tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format);
1168 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1169
1170 for (y = 0; y < 2; ++y) {
1171 for (x = 0; x < 2; ++x, ++tb) {
1172 if ((cbp >> (5 - tb)) & 1) {
1173 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1174 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1175 texels + y * tex_pitch * BLOCK_WIDTH +
1176 x * BLOCK_WIDTH, tex_pitch);
1177 }
1178 else {
1179 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1180 texels + y * tex_pitch + x * BLOCK_WIDTH,
1181 tex_pitch);
1182 }
1183
1184 ++sb;
1185 }
1186 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1187 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1188 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1189 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1190 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1191 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1192 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1193 }
1194 }
1195 }
1196 }
1197 }
1198
1199 /* TODO: Implement 422, 444 */
1200 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1201
1202 mbpx /= 2;
1203 mbpy /= 2;
1204
1205 for (tb = 0; tb < 2; ++tb) {
1206 tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1207 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1208
1209 if ((cbp >> (1 - tb)) & 1) {
1210 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1211 ++sb;
1212 }
1213 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1214 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1215 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1216 fill_zero_block(texels, tex_pitch);
1217 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1218 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1219 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1220 }
1221 }
1222 }
1223 }
1224 }
1225
1226 static void
1227 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1228 struct pipe_mpeg12_macroblock *mb)
1229 {
1230 void *blocks;
1231
1232 assert(r);
1233 assert(mb);
1234 assert(r->num_macroblocks < r->macroblocks_per_batch);
1235
1236 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1237 sizeof(struct pipe_mpeg12_macroblock));
1238
1239 blocks = pipe_buffer_map(r->pipe->screen, mb->blocks,
1240 PIPE_BUFFER_USAGE_CPU_READ);
1241 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, blocks);
1242 pipe_buffer_unmap(r->pipe->screen, mb->blocks);
1243
1244 ++r->num_macroblocks;
1245 }
1246
1247 bool
1248 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1249 struct pipe_context *pipe,
1250 unsigned picture_width,
1251 unsigned picture_height,
1252 enum pipe_video_chroma_format chroma_format,
1253 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1254 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1255 bool pot_buffers)
1256 {
1257 unsigned i;
1258
1259 assert(renderer);
1260 assert(pipe);
1261 /* TODO: Implement other policies */
1262 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1263 /* TODO: Implement this */
1264 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1265 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1266 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1267 assert(pot_buffers);
1268
1269 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1270
1271 renderer->pipe = pipe;
1272 renderer->picture_width = picture_width;
1273 renderer->picture_height = picture_height;
1274 renderer->chroma_format = chroma_format;
1275 renderer->bufmode = bufmode;
1276 renderer->eb_handling = eb_handling;
1277 renderer->pot_buffers = pot_buffers;
1278
1279 if (!init_pipe_state(renderer))
1280 return false;
1281 if (!init_shaders(renderer)) {
1282 cleanup_pipe_state(renderer);
1283 return false;
1284 }
1285 if (!init_buffers(renderer)) {
1286 cleanup_shaders(renderer);
1287 cleanup_pipe_state(renderer);
1288 return false;
1289 }
1290
1291 renderer->surface = NULL;
1292 renderer->past = NULL;
1293 renderer->future = NULL;
1294 for (i = 0; i < 3; ++i)
1295 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1296 renderer->num_macroblocks = 0;
1297
1298 xfer_buffers_map(renderer);
1299
1300 return true;
1301 }
1302
1303 void
1304 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1305 {
1306 assert(renderer);
1307
1308 xfer_buffers_unmap(renderer);
1309
1310 cleanup_pipe_state(renderer);
1311 cleanup_shaders(renderer);
1312 cleanup_buffers(renderer);
1313 }
1314
1315 void
1316 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1317 *renderer,
1318 struct pipe_texture *surface,
1319 struct pipe_texture *past,
1320 struct pipe_texture *future,
1321 unsigned num_macroblocks,
1322 struct pipe_mpeg12_macroblock
1323 *mpeg12_macroblocks,
1324 struct pipe_fence_handle **fence)
1325 {
1326 bool new_surface = false;
1327
1328 assert(renderer);
1329 assert(surface);
1330 assert(num_macroblocks);
1331 assert(mpeg12_macroblocks);
1332
1333 if (renderer->surface) {
1334 if (surface != renderer->surface) {
1335 if (renderer->num_macroblocks > 0) {
1336 xfer_buffers_unmap(renderer);
1337 flush(renderer);
1338 }
1339
1340 new_surface = true;
1341 }
1342
1343 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1344 assert(surface != renderer->surface || renderer->past == past);
1345 assert(surface != renderer->surface || renderer->future == future);
1346 }
1347 else
1348 new_surface = true;
1349
1350 if (new_surface) {
1351 renderer->surface = surface;
1352 renderer->past = past;
1353 renderer->future = future;
1354 renderer->fence = fence;
1355 renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
1356 renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
1357 }
1358
1359 while (num_macroblocks) {
1360 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1361 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1362 unsigned i;
1363
1364 for (i = 0; i < num_to_submit; ++i) {
1365 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1366 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1367 }
1368
1369 num_macroblocks -= num_to_submit;
1370
1371 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1372 xfer_buffers_unmap(renderer);
1373 flush(renderer);
1374 xfer_buffers_map(renderer);
1375 /* Next time we get this surface it may have new ref frames */
1376 renderer->surface = NULL;
1377 }
1378 }
1379 }