Workaround for skipping vertex elements for r600g
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
39
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45 #define ZERO_BLOCK_NIL -1.0f
46 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
47 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
48
49 struct vertex_shader_consts
50 {
51 struct vertex4f denorm;
52 };
53
54 struct fragment_shader_consts
55 {
56 struct vertex4f multiplier;
57 struct vertex4f div;
58 };
59
60 struct vert_stream_0
61 {
62 struct vertex2f pos;
63 struct vertex2f luma_tc;
64 struct vertex2f cb_tc;
65 struct vertex2f cr_tc;
66 };
67
68 enum MACROBLOCK_TYPE
69 {
70 MACROBLOCK_TYPE_INTRA,
71 MACROBLOCK_TYPE_FWD_FRAME_PRED,
72 MACROBLOCK_TYPE_FWD_FIELD_PRED,
73 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
74 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
75 MACROBLOCK_TYPE_BI_FRAME_PRED,
76 MACROBLOCK_TYPE_BI_FIELD_PRED,
77
78 NUM_MACROBLOCK_TYPES
79 };
80
81 static bool
82 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
83 {
84 struct ureg_program *shader;
85 struct ureg_src vpos, vtex[3];
86 struct ureg_dst o_vpos, o_vtex[3];
87 unsigned i;
88
89 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
90 if (!shader)
91 return false;
92
93 vpos = ureg_DECL_vs_input(shader, 0);
94 for (i = 0; i < 3; ++i)
95 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
96 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
97 for (i = 0; i < 3; ++i)
98 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
99
100 /*
101 * o_vpos = vpos
102 * o_vtex[0..2] = vtex[0..2]
103 */
104 ureg_MOV(shader, o_vpos, vpos);
105 for (i = 0; i < 3; ++i)
106 ureg_MOV(shader, o_vtex[i], vtex[i]);
107
108 ureg_END(shader);
109
110 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
111 if (!r->i_vs)
112 return false;
113
114 return true;
115 }
116
117 static bool
118 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
119 {
120 struct ureg_program *shader;
121 struct ureg_src tc[3];
122 struct ureg_src sampler[3];
123 struct ureg_dst texel, temp;
124 struct ureg_dst fragment;
125 unsigned i;
126
127 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
128 if (!shader)
129 return false;
130
131 for (i = 0; i < 3; ++i) {
132 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
133 sampler[i] = ureg_DECL_sampler(shader, i);
134 }
135 texel = ureg_DECL_temporary(shader);
136 temp = ureg_DECL_temporary(shader);
137 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
138
139 /*
140 * texel.r = tex(tc[0], sampler[0])
141 * texel.g = tex(tc[1], sampler[1])
142 * texel.b = tex(tc[2], sampler[2])
143 * fragment = texel * scale
144 */
145 for (i = 0; i < 3; ++i) {
146 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
147 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
148 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
149 }
150 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
151
152 ureg_release_temporary(shader, texel);
153 ureg_release_temporary(shader, temp);
154 ureg_END(shader);
155
156 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
157 if (!r->i_fs)
158 return false;
159
160 return true;
161 }
162
163 static bool
164 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
165 {
166 struct ureg_program *shader;
167 struct ureg_src vpos, vtex[4];
168 struct ureg_dst o_vpos, o_vtex[4];
169 unsigned i;
170
171 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
172 if (!shader)
173 return false;
174
175 vpos = ureg_DECL_vs_input(shader, 0);
176 for (i = 0; i < 4; ++i)
177 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
178 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
179 for (i = 0; i < 4; ++i)
180 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
181
182 /*
183 * o_vpos = vpos
184 * o_vtex[0..2] = vtex[0..2]
185 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
186 */
187 ureg_MOV(shader, o_vpos, vpos);
188 for (i = 0; i < 3; ++i)
189 ureg_MOV(shader, o_vtex[i], vtex[i]);
190 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
191
192 ureg_END(shader);
193
194 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
195 if (!r->p_vs[0])
196 return false;
197
198 return true;
199 }
200
201 #if 0
202 static void
203 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
204 {
205 assert(false);
206 }
207 #endif
208
209 static bool
210 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
211 {
212 struct ureg_program *shader;
213 struct ureg_src tc[4];
214 struct ureg_src sampler[4];
215 struct ureg_dst texel, ref;
216 struct ureg_dst fragment;
217 unsigned i;
218
219 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
220 if (!shader)
221 return false;
222
223 for (i = 0; i < 4; ++i) {
224 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
225 sampler[i] = ureg_DECL_sampler(shader, i);
226 }
227 texel = ureg_DECL_temporary(shader);
228 ref = ureg_DECL_temporary(shader);
229 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
230
231 /*
232 * texel.r = tex(tc[0], sampler[0])
233 * texel.g = tex(tc[1], sampler[1])
234 * texel.b = tex(tc[2], sampler[2])
235 * ref = tex(tc[3], sampler[3])
236 * fragment = texel * scale + ref
237 */
238 for (i = 0; i < 3; ++i) {
239 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
240 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
241 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
242 }
243 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
244 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
245
246 ureg_release_temporary(shader, texel);
247 ureg_release_temporary(shader, ref);
248 ureg_END(shader);
249
250 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
251 if (!r->p_fs[0])
252 return false;
253
254 return true;
255 }
256
257 #if 0
258 static void
259 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
260 {
261 assert(false);
262 }
263 #endif
264
265 static bool
266 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
267 {
268 struct ureg_program *shader;
269 struct ureg_src vpos, vtex[5];
270 struct ureg_dst o_vpos, o_vtex[5];
271 unsigned i;
272
273 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
274 if (!shader)
275 return false;
276
277 vpos = ureg_DECL_vs_input(shader, 0);
278 for (i = 0; i < 4; ++i)
279 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
280 /* Skip input 5 */
281 ureg_DECL_vs_input(shader, 5);
282 vtex[4] = ureg_DECL_vs_input(shader, 6);
283 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
284 for (i = 0; i < 5; ++i)
285 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
286
287 /*
288 * o_vpos = vpos
289 * o_vtex[0..2] = vtex[0..2]
290 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
291 */
292 ureg_MOV(shader, o_vpos, vpos);
293 for (i = 0; i < 3; ++i)
294 ureg_MOV(shader, o_vtex[i], vtex[i]);
295 for (i = 3; i < 5; ++i)
296 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
297
298 ureg_END(shader);
299
300 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
301 if (!r->b_vs[0])
302 return false;
303
304 return true;
305 }
306
307 #if 0
308 static void
309 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
310 {
311 assert(false);
312 }
313 #endif
314
315 static bool
316 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
317 {
318 struct ureg_program *shader;
319 struct ureg_src tc[5];
320 struct ureg_src sampler[5];
321 struct ureg_dst texel, ref[2];
322 struct ureg_dst fragment;
323 unsigned i;
324
325 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
326 if (!shader)
327 return false;
328
329 for (i = 0; i < 5; ++i) {
330 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
331 sampler[i] = ureg_DECL_sampler(shader, i);
332 }
333 texel = ureg_DECL_temporary(shader);
334 ref[0] = ureg_DECL_temporary(shader);
335 ref[1] = ureg_DECL_temporary(shader);
336 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
337
338 /*
339 * texel.r = tex(tc[0], sampler[0])
340 * texel.g = tex(tc[1], sampler[1])
341 * texel.b = tex(tc[2], sampler[2])
342 * ref[0..1 = tex(tc[3..4], sampler[3..4])
343 * ref[0] = lerp(ref[0], ref[1], 0.5)
344 * fragment = texel * scale + ref[0]
345 */
346 for (i = 0; i < 3; ++i) {
347 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
348 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
349 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
350 }
351 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
352 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
353 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
354
355 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
356
357 ureg_release_temporary(shader, texel);
358 ureg_release_temporary(shader, ref[0]);
359 ureg_release_temporary(shader, ref[1]);
360 ureg_END(shader);
361
362 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
363 if (!r->b_fs[0])
364 return false;
365
366 return true;
367 }
368
369 #if 0
370 static void
371 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
372 {
373 assert(false);
374 }
375 #endif
376
377 static void
378 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
379 {
380 unsigned i;
381
382 assert(r);
383
384 for (i = 0; i < 3; ++i) {
385 struct pipe_box rect =
386 {
387 0, 0, 0,
388 r->textures.all[i]->width0,
389 r->textures.all[i]->height0,
390 1
391 };
392
393 r->tex_transfer[i] = r->pipe->get_transfer
394 (
395 r->pipe, r->textures.all[i],
396 u_subresource(0, 0),
397 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
398 &rect
399 );
400
401 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
402 }
403 }
404
405 static void
406 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
407 {
408 unsigned i;
409
410 assert(r);
411
412 for (i = 0; i < 3; ++i) {
413 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
414 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
415 }
416 }
417
418 static bool
419 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
420 {
421 struct pipe_sampler_state sampler;
422 unsigned filters[5];
423 unsigned i;
424
425 assert(r);
426
427 r->viewport.scale[0] = r->pot_buffers ?
428 util_next_power_of_two(r->picture_width) : r->picture_width;
429 r->viewport.scale[1] = r->pot_buffers ?
430 util_next_power_of_two(r->picture_height) : r->picture_height;
431 r->viewport.scale[2] = 1;
432 r->viewport.scale[3] = 1;
433 r->viewport.translate[0] = 0;
434 r->viewport.translate[1] = 0;
435 r->viewport.translate[2] = 0;
436 r->viewport.translate[3] = 0;
437
438 r->fb_state.width = r->pot_buffers ?
439 util_next_power_of_two(r->picture_width) : r->picture_width;
440 r->fb_state.height = r->pot_buffers ?
441 util_next_power_of_two(r->picture_height) : r->picture_height;
442 r->fb_state.nr_cbufs = 1;
443 r->fb_state.zsbuf = NULL;
444
445 /* Luma filter */
446 filters[0] = PIPE_TEX_FILTER_NEAREST;
447 /* Chroma filters */
448 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
449 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
450 filters[1] = PIPE_TEX_FILTER_NEAREST;
451 filters[2] = PIPE_TEX_FILTER_NEAREST;
452 }
453 else {
454 filters[1] = PIPE_TEX_FILTER_LINEAR;
455 filters[2] = PIPE_TEX_FILTER_LINEAR;
456 }
457 /* Fwd, bkwd ref filters */
458 filters[3] = PIPE_TEX_FILTER_LINEAR;
459 filters[4] = PIPE_TEX_FILTER_LINEAR;
460
461 for (i = 0; i < 5; ++i) {
462 memset(&sampler, 0, sizeof(sampler));
463 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
464 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
465 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
466 sampler.min_img_filter = filters[i];
467 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
468 sampler.mag_img_filter = filters[i];
469 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
470 sampler.compare_func = PIPE_FUNC_ALWAYS;
471 sampler.normalized_coords = 1;
472 /*sampler.shadow_ambient = ; */
473 /*sampler.lod_bias = ; */
474 sampler.min_lod = 0;
475 /*sampler.max_lod = ; */
476 /*sampler.border_color[i] = ; */
477 /*sampler.max_anisotropy = ; */
478 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
479 }
480
481 return true;
482 }
483
484 static void
485 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
486 {
487 unsigned i;
488
489 assert(r);
490
491 for (i = 0; i < 5; ++i)
492 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
493 }
494
495 static bool
496 init_shaders(struct vl_mpeg12_mc_renderer *r)
497 {
498 assert(r);
499
500 create_intra_vert_shader(r);
501 create_intra_frag_shader(r);
502 create_frame_pred_vert_shader(r);
503 create_frame_pred_frag_shader(r);
504 create_frame_bi_pred_vert_shader(r);
505 create_frame_bi_pred_frag_shader(r);
506
507 return true;
508 }
509
510 static void
511 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
512 {
513 assert(r);
514
515 r->pipe->delete_vs_state(r->pipe, r->i_vs);
516 r->pipe->delete_fs_state(r->pipe, r->i_fs);
517 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
518 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
519 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
520 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
521 }
522
523 static bool
524 init_buffers(struct vl_mpeg12_mc_renderer *r)
525 {
526 struct pipe_resource template;
527 struct pipe_vertex_element vertex_elems[8];
528 struct pipe_sampler_view sampler_view;
529
530 const unsigned mbw =
531 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
532 const unsigned mbh =
533 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
534
535 unsigned i;
536
537 assert(r);
538
539 r->macroblocks_per_batch =
540 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
541 r->num_macroblocks = 0;
542 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
543
544 memset(&template, 0, sizeof(struct pipe_resource));
545 template.target = PIPE_TEXTURE_2D;
546 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
547 template.format = PIPE_FORMAT_R16_SNORM;
548 template.last_level = 0;
549 template.width0 = r->pot_buffers ?
550 util_next_power_of_two(r->picture_width) : r->picture_width;
551 template.height0 = r->pot_buffers ?
552 util_next_power_of_two(r->picture_height) : r->picture_height;
553 template.depth0 = 1;
554 template.usage = PIPE_USAGE_DYNAMIC;
555 template.bind = PIPE_BIND_SAMPLER_VIEW;
556 template.flags = 0;
557
558 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
559
560 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
561 template.width0 = r->pot_buffers ?
562 util_next_power_of_two(r->picture_width / 2) :
563 r->picture_width / 2;
564 template.height0 = r->pot_buffers ?
565 util_next_power_of_two(r->picture_height / 2) :
566 r->picture_height / 2;
567 }
568 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
569 template.height0 = r->pot_buffers ?
570 util_next_power_of_two(r->picture_height / 2) :
571 r->picture_height / 2;
572
573 r->textures.individual.cb =
574 r->pipe->screen->resource_create(r->pipe->screen, &template);
575 r->textures.individual.cr =
576 r->pipe->screen->resource_create(r->pipe->screen, &template);
577
578 for (i = 0; i < 3; ++i) {
579 u_sampler_view_default_template(&sampler_view,
580 r->textures.all[i],
581 r->textures.all[i]->format);
582 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
583 }
584
585 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
586 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
587 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
588 /* XXX: Create with usage DYNAMIC or STREAM */
589 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
590 (
591 r->pipe->screen,
592 PIPE_BIND_VERTEX_BUFFER,
593 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
594 );
595
596 for (i = 1; i < 3; ++i) {
597 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
598 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
599 r->vertex_bufs.all[i].buffer_offset = 0;
600 /* XXX: Create with usage DYNAMIC or STREAM */
601 r->vertex_bufs.all[i].buffer = pipe_buffer_create
602 (
603 r->pipe->screen,
604 PIPE_BIND_VERTEX_BUFFER,
605 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
606 );
607 }
608
609 memset(&vertex_elems, 0, sizeof(vertex_elems));
610
611 /* Position element */
612 vertex_elems[0].src_offset = 0;
613 vertex_elems[0].instance_divisor = 0;
614 vertex_elems[0].vertex_buffer_index = 0;
615 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
616
617 /* Luma, texcoord element */
618 vertex_elems[1].src_offset = sizeof(struct vertex2f);
619 vertex_elems[1].instance_divisor = 0;
620 vertex_elems[1].vertex_buffer_index = 0;
621 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
622
623 /* Chroma Cr texcoord element */
624 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
625 vertex_elems[2].instance_divisor = 0;
626 vertex_elems[2].vertex_buffer_index = 0;
627 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
628
629 /* Chroma Cb texcoord element */
630 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
631 vertex_elems[3].instance_divisor = 0;
632 vertex_elems[3].vertex_buffer_index = 0;
633 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
634
635 /* First ref surface top field texcoord element */
636 vertex_elems[4].src_offset = 0;
637 vertex_elems[4].instance_divisor = 0;
638 vertex_elems[4].vertex_buffer_index = 1;
639 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
640
641 /* First ref surface bottom field texcoord element */
642 vertex_elems[5].src_offset = sizeof(struct vertex2f);
643 vertex_elems[5].instance_divisor = 0;
644 vertex_elems[5].vertex_buffer_index = 1;
645 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
646
647 /* Second ref surface top field texcoord element */
648 vertex_elems[6].src_offset = 0;
649 vertex_elems[6].instance_divisor = 0;
650 vertex_elems[6].vertex_buffer_index = 2;
651 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
652
653 /* Second ref surface bottom field texcoord element */
654 vertex_elems[7].src_offset = sizeof(struct vertex2f);
655 vertex_elems[7].instance_divisor = 0;
656 vertex_elems[7].vertex_buffer_index = 2;
657 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
658
659 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
660 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
661 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
662
663 r->vs_const_buf = pipe_buffer_create
664 (
665 r->pipe->screen,
666 PIPE_BIND_CONSTANT_BUFFER,
667 sizeof(struct vertex_shader_consts)
668 );
669
670 return true;
671 }
672
673 static void
674 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
675 {
676 unsigned i;
677
678 assert(r);
679
680 pipe_resource_reference(&r->vs_const_buf, NULL);
681
682 for (i = 0; i < 3; ++i) {
683 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
684 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
685 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
686 pipe_resource_reference(&r->textures.all[i], NULL);
687 }
688
689 FREE(r->macroblock_buf);
690 }
691
692 static enum MACROBLOCK_TYPE
693 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
694 {
695 assert(mb);
696
697 switch (mb->mb_type) {
698 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
699 return MACROBLOCK_TYPE_INTRA;
700 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
701 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
702 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
703 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
704 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
705 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
706 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
707 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
708 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
709 default:
710 assert(0);
711 }
712
713 /* Unreachable */
714 return -1;
715 }
716
717 static void
718 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
719 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
720 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
721 bool use_zeroblocks, struct vertex2f *zero_blocks)
722 {
723 struct vertex2f v;
724
725 assert(vb);
726 assert(unit && half && offset);
727 assert(zero_blocks || !use_zeroblocks);
728
729 /* Generate vertices for two triangles covering a block */
730 v.x = mbx * unit->x + offset->x;
731 v.y = mby * unit->y + offset->y;
732
733 vb[0].pos.x = v.x;
734 vb[0].pos.y = v.y;
735 vb[1].pos.x = v.x;
736 vb[1].pos.y = v.y + half->y;
737 vb[2].pos.x = v.x + half->x;
738 vb[2].pos.y = v.y;
739 vb[3].pos.x = v.x + half->x;
740 vb[3].pos.y = v.y;
741 vb[4].pos.x = v.x;
742 vb[4].pos.y = v.y + half->y;
743 vb[5].pos.x = v.x + half->x;
744 vb[5].pos.y = v.y + half->y;
745
746 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
747 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
748 for this channel is defined for this block) */
749
750 if (!use_zeroblocks || cbp & luma_mask) {
751 v.x = mbx * unit->x + offset->x;
752 v.y = mby * unit->y + offset->y;
753 }
754 else {
755 v.x = zero_blocks[0].x;
756 v.y = zero_blocks[0].y;
757 }
758
759 vb[0].luma_tc.x = v.x;
760 vb[0].luma_tc.y = v.y;
761 vb[1].luma_tc.x = v.x;
762 vb[1].luma_tc.y = v.y + half->y;
763 vb[2].luma_tc.x = v.x + half->x;
764 vb[2].luma_tc.y = v.y;
765 vb[3].luma_tc.x = v.x + half->x;
766 vb[3].luma_tc.y = v.y;
767 vb[4].luma_tc.x = v.x;
768 vb[4].luma_tc.y = v.y + half->y;
769 vb[5].luma_tc.x = v.x + half->x;
770 vb[5].luma_tc.y = v.y + half->y;
771
772 if (!use_zeroblocks || cbp & cb_mask) {
773 v.x = mbx * unit->x + offset->x;
774 v.y = mby * unit->y + offset->y;
775 }
776 else {
777 v.x = zero_blocks[1].x;
778 v.y = zero_blocks[1].y;
779 }
780
781 vb[0].cb_tc.x = v.x;
782 vb[0].cb_tc.y = v.y;
783 vb[1].cb_tc.x = v.x;
784 vb[1].cb_tc.y = v.y + half->y;
785 vb[2].cb_tc.x = v.x + half->x;
786 vb[2].cb_tc.y = v.y;
787 vb[3].cb_tc.x = v.x + half->x;
788 vb[3].cb_tc.y = v.y;
789 vb[4].cb_tc.x = v.x;
790 vb[4].cb_tc.y = v.y + half->y;
791 vb[5].cb_tc.x = v.x + half->x;
792 vb[5].cb_tc.y = v.y + half->y;
793
794 if (!use_zeroblocks || cbp & cr_mask) {
795 v.x = mbx * unit->x + offset->x;
796 v.y = mby * unit->y + offset->y;
797 }
798 else {
799 v.x = zero_blocks[2].x;
800 v.y = zero_blocks[2].y;
801 }
802
803 vb[0].cr_tc.x = v.x;
804 vb[0].cr_tc.y = v.y;
805 vb[1].cr_tc.x = v.x;
806 vb[1].cr_tc.y = v.y + half->y;
807 vb[2].cr_tc.x = v.x + half->x;
808 vb[2].cr_tc.y = v.y;
809 vb[3].cr_tc.x = v.x + half->x;
810 vb[3].cr_tc.y = v.y;
811 vb[4].cr_tc.x = v.x;
812 vb[4].cr_tc.y = v.y + half->y;
813 vb[5].cr_tc.x = v.x + half->x;
814 vb[5].cr_tc.y = v.y + half->y;
815 }
816
817 static void
818 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
819 struct pipe_mpeg12_macroblock *mb, unsigned pos,
820 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
821 {
822 struct vertex2f mo_vec[2];
823
824 unsigned i;
825
826 assert(r);
827 assert(mb);
828 assert(ycbcr_vb);
829 assert(pos < r->macroblocks_per_batch);
830
831 mo_vec[1].x = 0;
832 mo_vec[1].y = 0;
833
834 switch (mb->mb_type) {
835 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
836 {
837 struct vertex2f *vb;
838
839 assert(ref_vb && ref_vb[1]);
840
841 vb = ref_vb[1] + pos * 2 * 24;
842
843 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
844 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
845
846 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
847 for (i = 0; i < 24 * 2; i += 2) {
848 vb[i].x = mo_vec[0].x;
849 vb[i].y = mo_vec[0].y;
850 }
851 }
852 else {
853 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
854 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
855
856 for (i = 0; i < 24 * 2; i += 2) {
857 vb[i].x = mo_vec[0].x;
858 vb[i].y = mo_vec[0].y;
859 vb[i + 1].x = mo_vec[1].x;
860 vb[i + 1].y = mo_vec[1].y;
861 }
862 }
863
864 /* fall-through */
865 }
866 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
867 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
868 {
869 struct vertex2f *vb;
870
871 assert(ref_vb && ref_vb[0]);
872
873 vb = ref_vb[0] + pos * 2 * 24;
874
875 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
876 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
877 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
878
879 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
880 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
881 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
882 }
883 }
884 else {
885 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
886 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
887
888 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
889 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
890 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
891 }
892 }
893
894 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
895 for (i = 0; i < 24 * 2; i += 2) {
896 vb[i].x = mo_vec[0].x;
897 vb[i].y = mo_vec[0].y;
898 }
899 }
900 else {
901 for (i = 0; i < 24 * 2; i += 2) {
902 vb[i].x = mo_vec[0].x;
903 vb[i].y = mo_vec[0].y;
904 vb[i + 1].x = mo_vec[1].x;
905 vb[i + 1].y = mo_vec[1].y;
906 }
907 }
908
909 /* fall-through */
910 }
911 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
912 {
913 const struct vertex2f unit =
914 {
915 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
916 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
917 };
918 const struct vertex2f half =
919 {
920 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
921 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
922 };
923 const struct vertex2f offsets[2][2] =
924 {
925 {
926 {0, 0}, {0, half.y}
927 },
928 {
929 {half.x, 0}, {half.x, half.y}
930 }
931 };
932 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
933
934 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
935
936 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
937 &unit, &half, &offsets[0][0],
938 32, 2, 1, use_zb, r->zero_block);
939
940 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
941 &unit, &half, &offsets[1][0],
942 16, 2, 1, use_zb, r->zero_block);
943
944 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
945 &unit, &half, &offsets[0][1],
946 8, 2, 1, use_zb, r->zero_block);
947
948 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
949 &unit, &half, &offsets[1][1],
950 4, 2, 1, use_zb, r->zero_block);
951
952 break;
953 }
954 default:
955 assert(0);
956 }
957 }
958
959 static void
960 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
961 unsigned *num_macroblocks)
962 {
963 unsigned offset[NUM_MACROBLOCK_TYPES];
964 struct vert_stream_0 *ycbcr_vb;
965 struct vertex2f *ref_vb[2];
966 struct pipe_transfer *buf_transfer[3];
967 unsigned i;
968
969 assert(r);
970 assert(num_macroblocks);
971
972 for (i = 0; i < r->num_macroblocks; ++i) {
973 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
974 ++num_macroblocks[mb_type];
975 }
976
977 offset[0] = 0;
978
979 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
980 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
981
982 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
983 (
984 r->pipe,
985 r->vertex_bufs.individual.ycbcr.buffer,
986 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
987 &buf_transfer[0]
988 );
989
990 for (i = 0; i < 2; ++i)
991 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
992 (
993 r->pipe,
994 r->vertex_bufs.individual.ref[i].buffer,
995 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
996 &buf_transfer[i + 1]
997 );
998
999 for (i = 0; i < r->num_macroblocks; ++i) {
1000 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1001
1002 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1003 ycbcr_vb, ref_vb);
1004
1005 ++offset[mb_type];
1006 }
1007
1008 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1009 for (i = 0; i < 2; ++i)
1010 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1011 }
1012
1013 static struct pipe_sampler_view
1014 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1015 {
1016 struct pipe_sampler_view *sampler_view;
1017 assert(r);
1018 assert(surface);
1019
1020 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1021 if (!sampler_view) {
1022 struct pipe_sampler_view templat;
1023 boolean added_to_map;
1024
1025 u_sampler_view_default_template(&templat, surface->texture,
1026 surface->texture->format);
1027 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1028 &templat);
1029 if (!sampler_view)
1030 return NULL;
1031
1032 added_to_map = util_keymap_insert(r->texview_map, &surface,
1033 sampler_view, r->pipe);
1034 assert(added_to_map);
1035 }
1036
1037 return sampler_view;
1038 }
1039
1040 static void
1041 flush(struct vl_mpeg12_mc_renderer *r)
1042 {
1043 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1044 unsigned vb_start = 0;
1045 struct vertex_shader_consts *vs_consts;
1046 struct pipe_transfer *buf_transfer;
1047 unsigned i;
1048
1049 assert(r);
1050 assert(r->num_macroblocks == r->macroblocks_per_batch);
1051
1052 gen_macroblock_stream(r, num_macroblocks);
1053
1054 r->fb_state.cbufs[0] = r->surface;
1055
1056 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1057 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1058
1059 vs_consts = pipe_buffer_map
1060 (
1061 r->pipe, r->vs_const_buf,
1062 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1063 &buf_transfer
1064 );
1065
1066 vs_consts->denorm.x = r->surface->width;
1067 vs_consts->denorm.y = r->surface->height;
1068
1069 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1070
1071 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1072 r->vs_const_buf);
1073
1074 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1075 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1076 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1077 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1078 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1079 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1080 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1081
1082 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1083 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1084 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1085 }
1086
1087 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1088 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1089 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1090 r->textures.individual.ref[0] = r->past->texture;
1091 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1092 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1093 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1094 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1095 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1096
1097 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1098 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1099 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1100 }
1101
1102 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1103 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1104 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1105 r->textures.individual.ref[0] = r->past->texture;
1106 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1107 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1108 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1109 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1110 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1111
1112 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1113 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1114 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1115 }
1116
1117 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1118 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1119 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1120 r->textures.individual.ref[0] = r->future->texture;
1121 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1122 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1123 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1124 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1125 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1126
1127 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1128 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1129 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1130 }
1131
1132 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
1133 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1134 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1135 r->textures.individual.ref[0] = r->future->texture;
1136 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1137 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1138 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1139 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1140 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1141
1142 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1143 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1144 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1145 }
1146
1147 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1148 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1149 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1150 r->textures.individual.ref[0] = r->past->texture;
1151 r->textures.individual.ref[1] = r->future->texture;
1152 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1153 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1154 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1155 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1156 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1157 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1158
1159 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1160 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1161 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1162 }
1163
1164 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1165 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1166 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1167 r->textures.individual.ref[0] = r->past->texture;
1168 r->textures.individual.ref[1] = r->future->texture;
1169 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1170 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1171 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1172 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1173 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1174 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1175
1176 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1177 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1178 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1179 }
1180
1181 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1182
1183 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1184 for (i = 0; i < 3; ++i)
1185 r->zero_block[i].x = ZERO_BLOCK_NIL;
1186
1187 r->num_macroblocks = 0;
1188 }
1189
1190 static void
1191 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1192 {
1193 unsigned y;
1194
1195 assert(src);
1196 assert(dst);
1197
1198 for (y = 0; y < BLOCK_HEIGHT; ++y)
1199 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1200 }
1201
1202 static void
1203 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1204 {
1205 unsigned y;
1206
1207 assert(src);
1208 assert(dst);
1209
1210 for (y = 0; y < BLOCK_HEIGHT; ++y)
1211 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1212 }
1213
1214 static void
1215 fill_zero_block(short *dst, unsigned dst_pitch)
1216 {
1217 unsigned y;
1218
1219 assert(dst);
1220
1221 for (y = 0; y < BLOCK_HEIGHT; ++y)
1222 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1223 }
1224
1225 static void
1226 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1227 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1228 {
1229 unsigned tex_pitch;
1230 short *texels;
1231 unsigned tb = 0, sb = 0;
1232 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1233 unsigned x, y;
1234
1235 assert(r);
1236 assert(blocks);
1237
1238 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1239 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1240
1241 for (y = 0; y < 2; ++y) {
1242 for (x = 0; x < 2; ++x, ++tb) {
1243 if ((cbp >> (5 - tb)) & 1) {
1244 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1245 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1246 texels + y * tex_pitch * BLOCK_WIDTH +
1247 x * BLOCK_WIDTH, tex_pitch);
1248 }
1249 else {
1250 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1251 texels + y * tex_pitch + x * BLOCK_WIDTH,
1252 tex_pitch);
1253 }
1254
1255 ++sb;
1256 }
1257 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1258 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1259 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1260 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1261 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1262 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1263 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1264 }
1265 }
1266 }
1267 }
1268 }
1269
1270 /* TODO: Implement 422, 444 */
1271 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1272
1273 mbpx /= 2;
1274 mbpy /= 2;
1275
1276 for (tb = 0; tb < 2; ++tb) {
1277 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1278 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1279
1280 if ((cbp >> (1 - tb)) & 1) {
1281 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1282 ++sb;
1283 }
1284 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1285 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1286 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1287 fill_zero_block(texels, tex_pitch);
1288 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1289 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1290 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1291 }
1292 }
1293 }
1294 }
1295 }
1296
1297 static void
1298 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1299 struct pipe_mpeg12_macroblock *mb)
1300 {
1301 assert(r);
1302 assert(mb);
1303 assert(mb->blocks);
1304 assert(r->num_macroblocks < r->macroblocks_per_batch);
1305
1306 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1307 sizeof(struct pipe_mpeg12_macroblock));
1308
1309 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1310
1311 ++r->num_macroblocks;
1312 }
1313
1314 static void
1315 texview_map_delete(const struct keymap *map,
1316 const void *key, void *data,
1317 void *user)
1318 {
1319 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1320
1321 assert(map);
1322 assert(key);
1323 assert(data);
1324 assert(user);
1325
1326 pipe_sampler_view_reference(&sv, NULL);
1327 }
1328
1329 bool
1330 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1331 struct pipe_context *pipe,
1332 unsigned picture_width,
1333 unsigned picture_height,
1334 enum pipe_video_chroma_format chroma_format,
1335 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1336 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1337 bool pot_buffers)
1338 {
1339 unsigned i;
1340
1341 assert(renderer);
1342 assert(pipe);
1343 /* TODO: Implement other policies */
1344 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1345 /* TODO: Implement this */
1346 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1347 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1348 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1349 assert(pot_buffers);
1350
1351 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1352
1353 renderer->pipe = pipe;
1354 renderer->picture_width = picture_width;
1355 renderer->picture_height = picture_height;
1356 renderer->chroma_format = chroma_format;
1357 renderer->bufmode = bufmode;
1358 renderer->eb_handling = eb_handling;
1359 renderer->pot_buffers = pot_buffers;
1360
1361 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1362 texview_map_delete);
1363 if (!renderer->texview_map)
1364 return false;
1365
1366 if (!init_pipe_state(renderer)) {
1367 util_delete_keymap(renderer->texview_map, renderer->pipe);
1368 return false;
1369 }
1370 if (!init_shaders(renderer)) {
1371 util_delete_keymap(renderer->texview_map, renderer->pipe);
1372 cleanup_pipe_state(renderer);
1373 return false;
1374 }
1375 if (!init_buffers(renderer)) {
1376 util_delete_keymap(renderer->texview_map, renderer->pipe);
1377 cleanup_shaders(renderer);
1378 cleanup_pipe_state(renderer);
1379 return false;
1380 }
1381
1382 renderer->surface = NULL;
1383 renderer->past = NULL;
1384 renderer->future = NULL;
1385 for (i = 0; i < 3; ++i)
1386 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1387 renderer->num_macroblocks = 0;
1388
1389 xfer_buffers_map(renderer);
1390
1391 return true;
1392 }
1393
1394 void
1395 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1396 {
1397 assert(renderer);
1398
1399 xfer_buffers_unmap(renderer);
1400
1401 util_delete_keymap(renderer->texview_map, renderer->pipe);
1402 cleanup_pipe_state(renderer);
1403 cleanup_shaders(renderer);
1404 cleanup_buffers(renderer);
1405
1406 pipe_surface_reference(&renderer->surface, NULL);
1407 pipe_surface_reference(&renderer->past, NULL);
1408 pipe_surface_reference(&renderer->future, NULL);
1409 }
1410
1411 void
1412 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1413 *renderer,
1414 struct pipe_surface *surface,
1415 struct pipe_surface *past,
1416 struct pipe_surface *future,
1417 unsigned num_macroblocks,
1418 struct pipe_mpeg12_macroblock
1419 *mpeg12_macroblocks,
1420 struct pipe_fence_handle **fence)
1421 {
1422 bool new_surface = false;
1423
1424 assert(renderer);
1425 assert(surface);
1426 assert(num_macroblocks);
1427 assert(mpeg12_macroblocks);
1428
1429 if (renderer->surface) {
1430 if (surface != renderer->surface) {
1431 if (renderer->num_macroblocks > 0) {
1432 xfer_buffers_unmap(renderer);
1433 flush(renderer);
1434 }
1435
1436 new_surface = true;
1437 }
1438
1439 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1440 assert(surface != renderer->surface || renderer->past == past);
1441 assert(surface != renderer->surface || renderer->future == future);
1442 }
1443 else
1444 new_surface = true;
1445
1446 if (new_surface) {
1447 pipe_surface_reference(&renderer->surface, surface);
1448 pipe_surface_reference(&renderer->past, past);
1449 pipe_surface_reference(&renderer->future, future);
1450 renderer->fence = fence;
1451 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1452 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1453 }
1454
1455 while (num_macroblocks) {
1456 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1457 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1458 unsigned i;
1459
1460 for (i = 0; i < num_to_submit; ++i) {
1461 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1462 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1463 }
1464
1465 num_macroblocks -= num_to_submit;
1466
1467 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1468 xfer_buffers_unmap(renderer);
1469 flush(renderer);
1470 xfer_buffers_map(renderer);
1471 /* Next time we get this surface it may have new ref frames */
1472 pipe_surface_reference(&renderer->surface, NULL);
1473 pipe_surface_reference(&renderer->past, NULL);
1474 pipe_surface_reference(&renderer->future, NULL);
1475 }
1476 }
1477 }