vl: initial implementation of vlVaQueryImageFormats(), vlVaCreateImage(), vlVaQuerySu...
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <util/u_draw.h>
39 #include <tgsi/tgsi_ureg.h>
40
41 #define DEFAULT_BUF_ALIGNMENT 1
42 #define MACROBLOCK_WIDTH 16
43 #define MACROBLOCK_HEIGHT 16
44 #define BLOCK_WIDTH 8
45 #define BLOCK_HEIGHT 8
46 #define ZERO_BLOCK_NIL -1.0f
47 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
48 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
49
50 struct vertex_shader_consts
51 {
52 struct vertex4f denorm;
53 };
54
55 struct fragment_shader_consts
56 {
57 struct vertex4f multiplier;
58 struct vertex4f div;
59 };
60
61 struct vert_stream_0
62 {
63 struct vertex2f pos;
64 struct vertex2f luma_tc;
65 struct vertex2f cb_tc;
66 struct vertex2f cr_tc;
67 };
68
69 enum MACROBLOCK_TYPE
70 {
71 MACROBLOCK_TYPE_INTRA,
72 MACROBLOCK_TYPE_FWD_FRAME_PRED,
73 MACROBLOCK_TYPE_FWD_FIELD_PRED,
74 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
75 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
76 MACROBLOCK_TYPE_BI_FRAME_PRED,
77 MACROBLOCK_TYPE_BI_FIELD_PRED,
78
79 NUM_MACROBLOCK_TYPES
80 };
81
82 static bool
83 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
84 {
85 struct ureg_program *shader;
86 struct ureg_src vpos, vtex[3];
87 struct ureg_dst o_vpos, o_vtex[3];
88 unsigned i;
89
90 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
91 if (!shader)
92 return false;
93
94 vpos = ureg_DECL_vs_input(shader, 0);
95 for (i = 0; i < 3; ++i)
96 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
97 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
98 for (i = 0; i < 3; ++i)
99 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
100
101 /*
102 * o_vpos = vpos
103 * o_vtex[0..2] = vtex[0..2]
104 */
105 ureg_MOV(shader, o_vpos, vpos);
106 for (i = 0; i < 3; ++i)
107 ureg_MOV(shader, o_vtex[i], vtex[i]);
108
109 ureg_END(shader);
110
111 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
112 if (!r->i_vs)
113 return false;
114
115 return true;
116 }
117
118 static bool
119 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
120 {
121 struct ureg_program *shader;
122 struct ureg_src tc[3];
123 struct ureg_src sampler[3];
124 struct ureg_dst texel, temp;
125 struct ureg_dst fragment;
126 unsigned i;
127
128 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
129 if (!shader)
130 return false;
131
132 for (i = 0; i < 3; ++i) {
133 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
134 sampler[i] = ureg_DECL_sampler(shader, i);
135 }
136 texel = ureg_DECL_temporary(shader);
137 temp = ureg_DECL_temporary(shader);
138 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
139
140 /*
141 * texel.r = tex(tc[0], sampler[0])
142 * texel.g = tex(tc[1], sampler[1])
143 * texel.b = tex(tc[2], sampler[2])
144 * fragment = texel * scale
145 */
146 for (i = 0; i < 3; ++i) {
147 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
148 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
149 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
150 }
151 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
152
153 ureg_release_temporary(shader, texel);
154 ureg_release_temporary(shader, temp);
155 ureg_END(shader);
156
157 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
158 if (!r->i_fs)
159 return false;
160
161 return true;
162 }
163
164 static bool
165 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
166 {
167 struct ureg_program *shader;
168 struct ureg_src vpos, vtex[4];
169 struct ureg_dst o_vpos, o_vtex[4];
170 unsigned i;
171
172 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
173 if (!shader)
174 return false;
175
176 vpos = ureg_DECL_vs_input(shader, 0);
177 for (i = 0; i < 4; ++i)
178 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
179 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
180 for (i = 0; i < 4; ++i)
181 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
182
183 /*
184 * o_vpos = vpos
185 * o_vtex[0..2] = vtex[0..2]
186 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
187 */
188 ureg_MOV(shader, o_vpos, vpos);
189 for (i = 0; i < 3; ++i)
190 ureg_MOV(shader, o_vtex[i], vtex[i]);
191 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
192
193 ureg_END(shader);
194
195 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
196 if (!r->p_vs[0])
197 return false;
198
199 return true;
200 }
201
202 #if 0
203 static void
204 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
205 {
206 assert(false);
207 }
208 #endif
209
210 static bool
211 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
212 {
213 struct ureg_program *shader;
214 struct ureg_src tc[4];
215 struct ureg_src sampler[4];
216 struct ureg_dst texel, ref;
217 struct ureg_dst fragment;
218 unsigned i;
219
220 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
221 if (!shader)
222 return false;
223
224 for (i = 0; i < 4; ++i) {
225 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
226 sampler[i] = ureg_DECL_sampler(shader, i);
227 }
228 texel = ureg_DECL_temporary(shader);
229 ref = ureg_DECL_temporary(shader);
230 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
231
232 /*
233 * texel.r = tex(tc[0], sampler[0])
234 * texel.g = tex(tc[1], sampler[1])
235 * texel.b = tex(tc[2], sampler[2])
236 * ref = tex(tc[3], sampler[3])
237 * fragment = texel * scale + ref
238 */
239 for (i = 0; i < 3; ++i) {
240 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
241 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
242 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
243 }
244 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
245 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
246
247 ureg_release_temporary(shader, texel);
248 ureg_release_temporary(shader, ref);
249 ureg_END(shader);
250
251 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
252 if (!r->p_fs[0])
253 return false;
254
255 return true;
256 }
257
258 #if 0
259 static void
260 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
261 {
262 assert(false);
263 }
264 #endif
265
266 static bool
267 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
268 {
269 struct ureg_program *shader;
270 struct ureg_src vpos, vtex[5];
271 struct ureg_dst o_vpos, o_vtex[5];
272 unsigned i;
273
274 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
275 if (!shader)
276 return false;
277
278 vpos = ureg_DECL_vs_input(shader, 0);
279 for (i = 0; i < 4; ++i)
280 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
281 /* Skip input 5 */
282 ureg_DECL_vs_input(shader, 5);
283 vtex[4] = ureg_DECL_vs_input(shader, 6);
284 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
285 for (i = 0; i < 5; ++i)
286 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
287
288 /*
289 * o_vpos = vpos
290 * o_vtex[0..2] = vtex[0..2]
291 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
292 */
293 ureg_MOV(shader, o_vpos, vpos);
294 for (i = 0; i < 3; ++i)
295 ureg_MOV(shader, o_vtex[i], vtex[i]);
296 for (i = 3; i < 5; ++i)
297 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
298
299 ureg_END(shader);
300
301 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
302 if (!r->b_vs[0])
303 return false;
304
305 return true;
306 }
307
308 #if 0
309 static void
310 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
311 {
312 assert(false);
313 }
314 #endif
315
316 static bool
317 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
318 {
319 struct ureg_program *shader;
320 struct ureg_src tc[5];
321 struct ureg_src sampler[5];
322 struct ureg_dst texel, ref[2];
323 struct ureg_dst fragment;
324 unsigned i;
325
326 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
327 if (!shader)
328 return false;
329
330 for (i = 0; i < 5; ++i) {
331 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
332 sampler[i] = ureg_DECL_sampler(shader, i);
333 }
334 texel = ureg_DECL_temporary(shader);
335 ref[0] = ureg_DECL_temporary(shader);
336 ref[1] = ureg_DECL_temporary(shader);
337 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
338
339 /*
340 * texel.r = tex(tc[0], sampler[0])
341 * texel.g = tex(tc[1], sampler[1])
342 * texel.b = tex(tc[2], sampler[2])
343 * ref[0..1 = tex(tc[3..4], sampler[3..4])
344 * ref[0] = lerp(ref[0], ref[1], 0.5)
345 * fragment = texel * scale + ref[0]
346 */
347 for (i = 0; i < 3; ++i) {
348 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
349 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
350 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
351 }
352 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
353 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
354 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
355
356 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
357
358 ureg_release_temporary(shader, texel);
359 ureg_release_temporary(shader, ref[0]);
360 ureg_release_temporary(shader, ref[1]);
361 ureg_END(shader);
362
363 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
364 if (!r->b_fs[0])
365 return false;
366
367 return true;
368 }
369
370 #if 0
371 static void
372 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
373 {
374 assert(false);
375 }
376 #endif
377
378 static void
379 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
380 {
381 unsigned i;
382
383 assert(r);
384
385 for (i = 0; i < 3; ++i) {
386 struct pipe_box rect =
387 {
388 0, 0, 0,
389 r->textures.all[i]->width0,
390 r->textures.all[i]->height0,
391 1
392 };
393
394 r->tex_transfer[i] = r->pipe->get_transfer
395 (
396 r->pipe, r->textures.all[i],
397 u_subresource(0, 0),
398 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
399 &rect
400 );
401
402 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
403 }
404 }
405
406 static void
407 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
408 {
409 unsigned i;
410
411 assert(r);
412
413 for (i = 0; i < 3; ++i) {
414 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
415 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
416 }
417 }
418
419 static bool
420 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
421 {
422 struct pipe_sampler_state sampler;
423 unsigned filters[5];
424 unsigned i;
425
426 assert(r);
427
428 r->viewport.scale[0] = r->pot_buffers ?
429 util_next_power_of_two(r->picture_width) : r->picture_width;
430 r->viewport.scale[1] = r->pot_buffers ?
431 util_next_power_of_two(r->picture_height) : r->picture_height;
432 r->viewport.scale[2] = 1;
433 r->viewport.scale[3] = 1;
434 r->viewport.translate[0] = 0;
435 r->viewport.translate[1] = 0;
436 r->viewport.translate[2] = 0;
437 r->viewport.translate[3] = 0;
438
439 r->fb_state.width = r->pot_buffers ?
440 util_next_power_of_two(r->picture_width) : r->picture_width;
441 r->fb_state.height = r->pot_buffers ?
442 util_next_power_of_two(r->picture_height) : r->picture_height;
443 r->fb_state.nr_cbufs = 1;
444 r->fb_state.zsbuf = NULL;
445
446 /* Luma filter */
447 filters[0] = PIPE_TEX_FILTER_NEAREST;
448 /* Chroma filters */
449 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
450 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
451 filters[1] = PIPE_TEX_FILTER_NEAREST;
452 filters[2] = PIPE_TEX_FILTER_NEAREST;
453 }
454 else {
455 filters[1] = PIPE_TEX_FILTER_LINEAR;
456 filters[2] = PIPE_TEX_FILTER_LINEAR;
457 }
458 /* Fwd, bkwd ref filters */
459 filters[3] = PIPE_TEX_FILTER_LINEAR;
460 filters[4] = PIPE_TEX_FILTER_LINEAR;
461
462 for (i = 0; i < 5; ++i) {
463 memset(&sampler, 0, sizeof(sampler));
464 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
465 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
466 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
467 sampler.min_img_filter = filters[i];
468 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
469 sampler.mag_img_filter = filters[i];
470 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
471 sampler.compare_func = PIPE_FUNC_ALWAYS;
472 sampler.normalized_coords = 1;
473 /*sampler.shadow_ambient = ; */
474 /*sampler.lod_bias = ; */
475 sampler.min_lod = 0;
476 /*sampler.max_lod = ; */
477 /*sampler.border_color[i] = ; */
478 /*sampler.max_anisotropy = ; */
479 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
480 }
481
482 return true;
483 }
484
485 static void
486 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
487 {
488 unsigned i;
489
490 assert(r);
491
492 for (i = 0; i < 5; ++i)
493 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
494 }
495
496 static bool
497 init_shaders(struct vl_mpeg12_mc_renderer *r)
498 {
499 assert(r);
500
501 create_intra_vert_shader(r);
502 create_intra_frag_shader(r);
503 create_frame_pred_vert_shader(r);
504 create_frame_pred_frag_shader(r);
505 create_frame_bi_pred_vert_shader(r);
506 create_frame_bi_pred_frag_shader(r);
507
508 return true;
509 }
510
511 static void
512 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
513 {
514 assert(r);
515
516 r->pipe->delete_vs_state(r->pipe, r->i_vs);
517 r->pipe->delete_fs_state(r->pipe, r->i_fs);
518 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
519 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
520 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
521 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
522 }
523
524 static bool
525 init_buffers(struct vl_mpeg12_mc_renderer *r)
526 {
527 struct pipe_resource template;
528 struct pipe_vertex_element vertex_elems[8];
529 struct pipe_sampler_view sampler_view;
530
531 const unsigned mbw =
532 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
533 const unsigned mbh =
534 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
535
536 unsigned i;
537
538 assert(r);
539
540 r->macroblocks_per_batch =
541 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
542 r->num_macroblocks = 0;
543 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
544
545 memset(&template, 0, sizeof(struct pipe_resource));
546 template.target = PIPE_TEXTURE_2D;
547 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
548 template.format = PIPE_FORMAT_R16_SNORM;
549 template.last_level = 0;
550 template.width0 = r->pot_buffers ?
551 util_next_power_of_two(r->picture_width) : r->picture_width;
552 template.height0 = r->pot_buffers ?
553 util_next_power_of_two(r->picture_height) : r->picture_height;
554 template.depth0 = 1;
555 template.usage = PIPE_USAGE_DYNAMIC;
556 template.bind = PIPE_BIND_SAMPLER_VIEW;
557 template.flags = 0;
558
559 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
560
561 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
562 template.width0 = r->pot_buffers ?
563 util_next_power_of_two(r->picture_width / 2) :
564 r->picture_width / 2;
565 template.height0 = r->pot_buffers ?
566 util_next_power_of_two(r->picture_height / 2) :
567 r->picture_height / 2;
568 }
569 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
570 template.height0 = r->pot_buffers ?
571 util_next_power_of_two(r->picture_height / 2) :
572 r->picture_height / 2;
573
574 r->textures.individual.cb =
575 r->pipe->screen->resource_create(r->pipe->screen, &template);
576 r->textures.individual.cr =
577 r->pipe->screen->resource_create(r->pipe->screen, &template);
578
579 for (i = 0; i < 3; ++i) {
580 u_sampler_view_default_template(&sampler_view,
581 r->textures.all[i],
582 r->textures.all[i]->format);
583 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
584 }
585
586 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
587 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
588 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
589 /* XXX: Create with usage DYNAMIC or STREAM */
590 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
591 (
592 r->pipe->screen,
593 PIPE_BIND_VERTEX_BUFFER,
594 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
595 );
596
597 for (i = 1; i < 3; ++i) {
598 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
599 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
600 r->vertex_bufs.all[i].buffer_offset = 0;
601 /* XXX: Create with usage DYNAMIC or STREAM */
602 r->vertex_bufs.all[i].buffer = pipe_buffer_create
603 (
604 r->pipe->screen,
605 PIPE_BIND_VERTEX_BUFFER,
606 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
607 );
608 }
609
610 memset(&vertex_elems, 0, sizeof(vertex_elems));
611
612 /* Position element */
613 vertex_elems[0].src_offset = 0;
614 vertex_elems[0].instance_divisor = 0;
615 vertex_elems[0].vertex_buffer_index = 0;
616 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
617
618 /* Luma, texcoord element */
619 vertex_elems[1].src_offset = sizeof(struct vertex2f);
620 vertex_elems[1].instance_divisor = 0;
621 vertex_elems[1].vertex_buffer_index = 0;
622 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
623
624 /* Chroma Cr texcoord element */
625 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
626 vertex_elems[2].instance_divisor = 0;
627 vertex_elems[2].vertex_buffer_index = 0;
628 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
629
630 /* Chroma Cb texcoord element */
631 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
632 vertex_elems[3].instance_divisor = 0;
633 vertex_elems[3].vertex_buffer_index = 0;
634 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
635
636 /* First ref surface top field texcoord element */
637 vertex_elems[4].src_offset = 0;
638 vertex_elems[4].instance_divisor = 0;
639 vertex_elems[4].vertex_buffer_index = 1;
640 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
641
642 /* First ref surface bottom field texcoord element */
643 vertex_elems[5].src_offset = sizeof(struct vertex2f);
644 vertex_elems[5].instance_divisor = 0;
645 vertex_elems[5].vertex_buffer_index = 1;
646 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
647
648 /* Second ref surface top field texcoord element */
649 vertex_elems[6].src_offset = 0;
650 vertex_elems[6].instance_divisor = 0;
651 vertex_elems[6].vertex_buffer_index = 2;
652 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
653
654 /* Second ref surface bottom field texcoord element */
655 vertex_elems[7].src_offset = sizeof(struct vertex2f);
656 vertex_elems[7].instance_divisor = 0;
657 vertex_elems[7].vertex_buffer_index = 2;
658 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
659
660 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
661 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
662 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
663
664 r->vs_const_buf = pipe_buffer_create
665 (
666 r->pipe->screen,
667 PIPE_BIND_CONSTANT_BUFFER,
668 sizeof(struct vertex_shader_consts)
669 );
670
671 return true;
672 }
673
674 static void
675 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
676 {
677 unsigned i;
678
679 assert(r);
680
681 pipe_resource_reference(&r->vs_const_buf, NULL);
682
683 for (i = 0; i < 3; ++i) {
684 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
685 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
686 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
687 pipe_resource_reference(&r->textures.all[i], NULL);
688 }
689
690 FREE(r->macroblock_buf);
691 }
692
693 static enum MACROBLOCK_TYPE
694 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
695 {
696 assert(mb);
697
698 switch (mb->mb_type) {
699 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
700 return MACROBLOCK_TYPE_INTRA;
701 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
702 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
703 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
704 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
705 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
706 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
707 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
708 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
709 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
710 default:
711 assert(0);
712 }
713
714 /* Unreachable */
715 return -1;
716 }
717
718 static void
719 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
720 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
721 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
722 bool use_zeroblocks, struct vertex2f *zero_blocks)
723 {
724 struct vertex2f v;
725
726 assert(vb);
727 assert(unit && half && offset);
728 assert(zero_blocks || !use_zeroblocks);
729
730 /* Generate vertices for two triangles covering a block */
731 v.x = mbx * unit->x + offset->x;
732 v.y = mby * unit->y + offset->y;
733
734 vb[0].pos.x = v.x;
735 vb[0].pos.y = v.y;
736 vb[1].pos.x = v.x;
737 vb[1].pos.y = v.y + half->y;
738 vb[2].pos.x = v.x + half->x;
739 vb[2].pos.y = v.y;
740 vb[3].pos.x = v.x + half->x;
741 vb[3].pos.y = v.y;
742 vb[4].pos.x = v.x;
743 vb[4].pos.y = v.y + half->y;
744 vb[5].pos.x = v.x + half->x;
745 vb[5].pos.y = v.y + half->y;
746
747 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
748 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
749 for this channel is defined for this block) */
750
751 if (!use_zeroblocks || cbp & luma_mask) {
752 v.x = mbx * unit->x + offset->x;
753 v.y = mby * unit->y + offset->y;
754 }
755 else {
756 v.x = zero_blocks[0].x;
757 v.y = zero_blocks[0].y;
758 }
759
760 vb[0].luma_tc.x = v.x;
761 vb[0].luma_tc.y = v.y;
762 vb[1].luma_tc.x = v.x;
763 vb[1].luma_tc.y = v.y + half->y;
764 vb[2].luma_tc.x = v.x + half->x;
765 vb[2].luma_tc.y = v.y;
766 vb[3].luma_tc.x = v.x + half->x;
767 vb[3].luma_tc.y = v.y;
768 vb[4].luma_tc.x = v.x;
769 vb[4].luma_tc.y = v.y + half->y;
770 vb[5].luma_tc.x = v.x + half->x;
771 vb[5].luma_tc.y = v.y + half->y;
772
773 if (!use_zeroblocks || cbp & cb_mask) {
774 v.x = mbx * unit->x + offset->x;
775 v.y = mby * unit->y + offset->y;
776 }
777 else {
778 v.x = zero_blocks[1].x;
779 v.y = zero_blocks[1].y;
780 }
781
782 vb[0].cb_tc.x = v.x;
783 vb[0].cb_tc.y = v.y;
784 vb[1].cb_tc.x = v.x;
785 vb[1].cb_tc.y = v.y + half->y;
786 vb[2].cb_tc.x = v.x + half->x;
787 vb[2].cb_tc.y = v.y;
788 vb[3].cb_tc.x = v.x + half->x;
789 vb[3].cb_tc.y = v.y;
790 vb[4].cb_tc.x = v.x;
791 vb[4].cb_tc.y = v.y + half->y;
792 vb[5].cb_tc.x = v.x + half->x;
793 vb[5].cb_tc.y = v.y + half->y;
794
795 if (!use_zeroblocks || cbp & cr_mask) {
796 v.x = mbx * unit->x + offset->x;
797 v.y = mby * unit->y + offset->y;
798 }
799 else {
800 v.x = zero_blocks[2].x;
801 v.y = zero_blocks[2].y;
802 }
803
804 vb[0].cr_tc.x = v.x;
805 vb[0].cr_tc.y = v.y;
806 vb[1].cr_tc.x = v.x;
807 vb[1].cr_tc.y = v.y + half->y;
808 vb[2].cr_tc.x = v.x + half->x;
809 vb[2].cr_tc.y = v.y;
810 vb[3].cr_tc.x = v.x + half->x;
811 vb[3].cr_tc.y = v.y;
812 vb[4].cr_tc.x = v.x;
813 vb[4].cr_tc.y = v.y + half->y;
814 vb[5].cr_tc.x = v.x + half->x;
815 vb[5].cr_tc.y = v.y + half->y;
816 }
817
818 static void
819 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
820 struct pipe_mpeg12_macroblock *mb, unsigned pos,
821 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
822 {
823 struct vertex2f mo_vec[2];
824
825 unsigned i;
826
827 assert(r);
828 assert(mb);
829 assert(ycbcr_vb);
830 assert(pos < r->macroblocks_per_batch);
831
832 mo_vec[1].x = 0;
833 mo_vec[1].y = 0;
834
835 switch (mb->mb_type) {
836 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
837 {
838 struct vertex2f *vb;
839
840 assert(ref_vb && ref_vb[1]);
841
842 vb = ref_vb[1] + pos * 2 * 24;
843
844 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
845 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
846
847 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
848 for (i = 0; i < 24 * 2; i += 2) {
849 vb[i].x = mo_vec[0].x;
850 vb[i].y = mo_vec[0].y;
851 }
852 }
853 else {
854 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
855 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
856
857 for (i = 0; i < 24 * 2; i += 2) {
858 vb[i].x = mo_vec[0].x;
859 vb[i].y = mo_vec[0].y;
860 vb[i + 1].x = mo_vec[1].x;
861 vb[i + 1].y = mo_vec[1].y;
862 }
863 }
864
865 /* fall-through */
866 }
867 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
868 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
869 {
870 struct vertex2f *vb;
871
872 assert(ref_vb && ref_vb[0]);
873
874 vb = ref_vb[0] + pos * 2 * 24;
875
876 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
877 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
878 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
879
880 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
881 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
882 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
883 }
884 }
885 else {
886 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
887 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
888
889 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
890 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
891 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
892 }
893 }
894
895 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
896 for (i = 0; i < 24 * 2; i += 2) {
897 vb[i].x = mo_vec[0].x;
898 vb[i].y = mo_vec[0].y;
899 }
900 }
901 else {
902 for (i = 0; i < 24 * 2; i += 2) {
903 vb[i].x = mo_vec[0].x;
904 vb[i].y = mo_vec[0].y;
905 vb[i + 1].x = mo_vec[1].x;
906 vb[i + 1].y = mo_vec[1].y;
907 }
908 }
909
910 /* fall-through */
911 }
912 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
913 {
914 const struct vertex2f unit =
915 {
916 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
917 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
918 };
919 const struct vertex2f half =
920 {
921 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
922 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
923 };
924 const struct vertex2f offsets[2][2] =
925 {
926 {
927 {0, 0}, {0, half.y}
928 },
929 {
930 {half.x, 0}, {half.x, half.y}
931 }
932 };
933 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
934
935 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
936
937 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
938 &unit, &half, &offsets[0][0],
939 32, 2, 1, use_zb, r->zero_block);
940
941 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
942 &unit, &half, &offsets[1][0],
943 16, 2, 1, use_zb, r->zero_block);
944
945 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
946 &unit, &half, &offsets[0][1],
947 8, 2, 1, use_zb, r->zero_block);
948
949 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
950 &unit, &half, &offsets[1][1],
951 4, 2, 1, use_zb, r->zero_block);
952
953 break;
954 }
955 default:
956 assert(0);
957 }
958 }
959
960 static void
961 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
962 unsigned *num_macroblocks)
963 {
964 unsigned offset[NUM_MACROBLOCK_TYPES];
965 struct vert_stream_0 *ycbcr_vb;
966 struct vertex2f *ref_vb[2];
967 struct pipe_transfer *buf_transfer[3];
968 unsigned i;
969
970 assert(r);
971 assert(num_macroblocks);
972
973 for (i = 0; i < r->num_macroblocks; ++i) {
974 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
975 ++num_macroblocks[mb_type];
976 }
977
978 offset[0] = 0;
979
980 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
981 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
982
983 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
984 (
985 r->pipe,
986 r->vertex_bufs.individual.ycbcr.buffer,
987 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
988 &buf_transfer[0]
989 );
990
991 for (i = 0; i < 2; ++i)
992 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
993 (
994 r->pipe,
995 r->vertex_bufs.individual.ref[i].buffer,
996 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
997 &buf_transfer[i + 1]
998 );
999
1000 for (i = 0; i < r->num_macroblocks; ++i) {
1001 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1002
1003 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1004 ycbcr_vb, ref_vb);
1005
1006 ++offset[mb_type];
1007 }
1008
1009 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1010 for (i = 0; i < 2; ++i)
1011 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1012 }
1013
1014 static struct pipe_sampler_view
1015 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1016 {
1017 struct pipe_sampler_view *sampler_view;
1018 assert(r);
1019 assert(surface);
1020
1021 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1022 if (!sampler_view) {
1023 struct pipe_sampler_view templat;
1024 boolean added_to_map;
1025
1026 u_sampler_view_default_template(&templat, surface->texture,
1027 surface->texture->format);
1028 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1029 &templat);
1030 if (!sampler_view)
1031 return NULL;
1032
1033 added_to_map = util_keymap_insert(r->texview_map, &surface,
1034 sampler_view, r->pipe);
1035 assert(added_to_map);
1036 }
1037
1038 return sampler_view;
1039 }
1040
1041 static void
1042 flush(struct vl_mpeg12_mc_renderer *r)
1043 {
1044 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1045 unsigned vb_start = 0;
1046 struct vertex_shader_consts *vs_consts;
1047 struct pipe_transfer *buf_transfer;
1048
1049 unsigned i;
1050
1051 assert(r);
1052 assert(r->num_macroblocks == r->macroblocks_per_batch);
1053
1054 gen_macroblock_stream(r, num_macroblocks);
1055
1056 r->fb_state.cbufs[0] = r->surface;
1057
1058 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1059 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1060
1061 vs_consts = pipe_buffer_map
1062 (
1063 r->pipe, r->vs_const_buf,
1064 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1065 &buf_transfer
1066 );
1067
1068 vs_consts->denorm.x = r->surface->width;
1069 vs_consts->denorm.y = r->surface->height;
1070
1071 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1072
1073 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1074 r->vs_const_buf);
1075
1076
1077 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1078 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1079 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1080 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1081 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1082 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1083 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1084
1085 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1086
1087 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1088 }
1089
1090 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1091 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1092 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1093 r->textures.individual.ref[0] = r->past->texture;
1094 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1095 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1096 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1097 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1098 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1099
1100 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1101
1102
1103 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1104 }
1105
1106 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1107 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1108 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1109 r->textures.individual.ref[0] = r->past->texture;
1110 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1111 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1112 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1113 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1114 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1115
1116 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1117
1118
1119 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1120 }
1121
1122 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1123 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1124 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1125 r->textures.individual.ref[0] = r->future->texture;
1126 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1127 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1128 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1129 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1130 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1131
1132 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1133
1134 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1135 }
1136
1137 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
1138 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1139 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1140 r->textures.individual.ref[0] = r->future->texture;
1141 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1142 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1143 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1144 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1145 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1146
1147 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1148
1149 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1150 }
1151
1152 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1153 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1154 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1155 r->textures.individual.ref[0] = r->past->texture;
1156 r->textures.individual.ref[1] = r->future->texture;
1157 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1158 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1159 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1160 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1161 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1162 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1163
1164
1165 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1166
1167 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1168 }
1169
1170 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1171 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1172 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1173 r->textures.individual.ref[0] = r->past->texture;
1174 r->textures.individual.ref[1] = r->future->texture;
1175 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1176 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1177 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1178 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1179 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1180 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1181
1182 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1183
1184 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1185 }
1186
1187 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1188
1189 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1190 for (i = 0; i < 3; ++i)
1191 r->zero_block[i].x = ZERO_BLOCK_NIL;
1192
1193 r->num_macroblocks = 0;
1194 }
1195
1196 static void
1197 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1198 {
1199 unsigned y;
1200
1201 assert(src);
1202 assert(dst);
1203
1204 for (y = 0; y < BLOCK_HEIGHT; ++y)
1205 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1206 }
1207
1208 static void
1209 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1210 {
1211 unsigned y;
1212
1213 assert(src);
1214 assert(dst);
1215
1216 for (y = 0; y < BLOCK_HEIGHT; ++y)
1217 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1218 }
1219
1220 static void
1221 fill_zero_block(short *dst, unsigned dst_pitch)
1222 {
1223 unsigned y;
1224
1225 assert(dst);
1226
1227 for (y = 0; y < BLOCK_HEIGHT; ++y)
1228 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1229 }
1230
1231 static void
1232 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1233 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1234 {
1235 unsigned tex_pitch;
1236 short *texels;
1237 unsigned tb = 0, sb = 0;
1238 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1239 unsigned x, y;
1240
1241 assert(r);
1242 assert(blocks);
1243
1244 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1245 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1246
1247 for (y = 0; y < 2; ++y) {
1248 for (x = 0; x < 2; ++x, ++tb) {
1249 if ((cbp >> (5 - tb)) & 1) {
1250 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1251 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1252 texels + y * tex_pitch * BLOCK_WIDTH +
1253 x * BLOCK_WIDTH, tex_pitch);
1254 }
1255 else {
1256 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1257 texels + y * tex_pitch + x * BLOCK_WIDTH,
1258 tex_pitch);
1259 }
1260
1261 ++sb;
1262 }
1263 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1264 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1265 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1266 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1267 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1268 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1269 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 /* TODO: Implement 422, 444 */
1277 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1278
1279 mbpx /= 2;
1280 mbpy /= 2;
1281
1282 for (tb = 0; tb < 2; ++tb) {
1283 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1284 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1285
1286 if ((cbp >> (1 - tb)) & 1) {
1287 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1288 ++sb;
1289 }
1290 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1291 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1292 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1293 fill_zero_block(texels, tex_pitch);
1294 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1295 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1296 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1297 }
1298 }
1299 }
1300 }
1301 }
1302
1303 static void
1304 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1305 struct pipe_mpeg12_macroblock *mb)
1306 {
1307 assert(r);
1308 assert(mb);
1309 assert(mb->blocks);
1310 assert(r->num_macroblocks < r->macroblocks_per_batch);
1311
1312 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1313 sizeof(struct pipe_mpeg12_macroblock));
1314
1315 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1316
1317 ++r->num_macroblocks;
1318 }
1319
1320 static void
1321 texview_map_delete(const struct keymap *map,
1322 const void *key, void *data,
1323 void *user)
1324 {
1325 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1326
1327 assert(map);
1328 assert(key);
1329 assert(data);
1330 assert(user);
1331
1332 pipe_sampler_view_reference(&sv, NULL);
1333 }
1334
1335 bool
1336 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1337 struct pipe_context *pipe,
1338 unsigned picture_width,
1339 unsigned picture_height,
1340 enum pipe_video_chroma_format chroma_format,
1341 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1342 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1343 bool pot_buffers)
1344 {
1345 unsigned i;
1346
1347 assert(renderer);
1348 assert(pipe);
1349 /* TODO: Implement other policies */
1350 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1351 /* TODO: Implement this */
1352 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1353 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1354 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1355 assert(pot_buffers);
1356
1357 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1358
1359 renderer->pipe = pipe;
1360 renderer->picture_width = picture_width;
1361 renderer->picture_height = picture_height;
1362 renderer->chroma_format = chroma_format;
1363 renderer->bufmode = bufmode;
1364 renderer->eb_handling = eb_handling;
1365 renderer->pot_buffers = pot_buffers;
1366
1367 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1368 texview_map_delete);
1369 if (!renderer->texview_map)
1370 return false;
1371
1372 if (!init_pipe_state(renderer)) {
1373 util_delete_keymap(renderer->texview_map, renderer->pipe);
1374 return false;
1375 }
1376 if (!init_shaders(renderer)) {
1377 util_delete_keymap(renderer->texview_map, renderer->pipe);
1378 cleanup_pipe_state(renderer);
1379 return false;
1380 }
1381 if (!init_buffers(renderer)) {
1382 util_delete_keymap(renderer->texview_map, renderer->pipe);
1383 cleanup_shaders(renderer);
1384 cleanup_pipe_state(renderer);
1385 return false;
1386 }
1387
1388 renderer->surface = NULL;
1389 renderer->past = NULL;
1390 renderer->future = NULL;
1391 for (i = 0; i < 3; ++i)
1392 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1393 renderer->num_macroblocks = 0;
1394
1395 xfer_buffers_map(renderer);
1396
1397 return true;
1398 }
1399
1400 void
1401 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1402 {
1403 assert(renderer);
1404
1405 xfer_buffers_unmap(renderer);
1406
1407 util_delete_keymap(renderer->texview_map, renderer->pipe);
1408 cleanup_pipe_state(renderer);
1409 cleanup_shaders(renderer);
1410 cleanup_buffers(renderer);
1411
1412 pipe_surface_reference(&renderer->surface, NULL);
1413 pipe_surface_reference(&renderer->past, NULL);
1414 pipe_surface_reference(&renderer->future, NULL);
1415 }
1416
1417 void
1418 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1419 *renderer,
1420 struct pipe_surface *surface,
1421 struct pipe_surface *past,
1422 struct pipe_surface *future,
1423 unsigned num_macroblocks,
1424 struct pipe_mpeg12_macroblock
1425 *mpeg12_macroblocks,
1426 struct pipe_fence_handle **fence)
1427 {
1428 bool new_surface = false;
1429
1430 assert(renderer);
1431 assert(surface);
1432 assert(num_macroblocks);
1433 assert(mpeg12_macroblocks);
1434
1435 if (renderer->surface) {
1436 if (surface != renderer->surface) {
1437 if (renderer->num_macroblocks > 0) {
1438 xfer_buffers_unmap(renderer);
1439 flush(renderer);
1440 }
1441
1442 new_surface = true;
1443 }
1444
1445 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1446 assert(surface != renderer->surface || renderer->past == past);
1447 assert(surface != renderer->surface || renderer->future == future);
1448 }
1449 else
1450 new_surface = true;
1451
1452 if (new_surface) {
1453 pipe_surface_reference(&renderer->surface, surface);
1454 pipe_surface_reference(&renderer->past, past);
1455 pipe_surface_reference(&renderer->future, future);
1456 renderer->fence = fence;
1457 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1458 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1459 }
1460
1461 while (num_macroblocks) {
1462 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1463 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1464 unsigned i;
1465
1466 for (i = 0; i < num_to_submit; ++i) {
1467 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1468 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1469 }
1470
1471 num_macroblocks -= num_to_submit;
1472
1473 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1474 xfer_buffers_unmap(renderer);
1475 flush(renderer);
1476 xfer_buffers_map(renderer);
1477 /* Next time we get this surface it may have new ref frames */
1478 pipe_surface_reference(&renderer->surface, NULL);
1479 pipe_surface_reference(&renderer->past, NULL);
1480 pipe_surface_reference(&renderer->future, NULL);
1481 }
1482 }
1483 }