vl: changed video pipe to use the new gallium API within master
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <util/u_keymap.h>
36 #include <util/u_sampler.h>
37 #include <tgsi/tgsi_ureg.h>
38
39 #define DEFAULT_BUF_ALIGNMENT 1
40 #define MACROBLOCK_WIDTH 16
41 #define MACROBLOCK_HEIGHT 16
42 #define BLOCK_WIDTH 8
43 #define BLOCK_HEIGHT 8
44 #define ZERO_BLOCK_NIL -1.0f
45 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
46 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
47
48 struct vertex_shader_consts
49 {
50 struct vertex4f denorm;
51 };
52
53 struct fragment_shader_consts
54 {
55 struct vertex4f multiplier;
56 struct vertex4f div;
57 };
58
59 struct vert_stream_0
60 {
61 struct vertex2f pos;
62 struct vertex2f luma_tc;
63 struct vertex2f cb_tc;
64 struct vertex2f cr_tc;
65 };
66
67 enum MACROBLOCK_TYPE
68 {
69 MACROBLOCK_TYPE_INTRA,
70 MACROBLOCK_TYPE_FWD_FRAME_PRED,
71 MACROBLOCK_TYPE_FWD_FIELD_PRED,
72 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
73 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
74 MACROBLOCK_TYPE_BI_FRAME_PRED,
75 MACROBLOCK_TYPE_BI_FIELD_PRED,
76
77 NUM_MACROBLOCK_TYPES
78 };
79
80 static bool
81 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
82 {
83 struct ureg_program *shader;
84 struct ureg_src vpos, vtex[3];
85 struct ureg_dst o_vpos, o_vtex[3];
86 unsigned i;
87
88 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
89 if (!shader)
90 return false;
91
92 vpos = ureg_DECL_vs_input(shader, 0);
93 for (i = 0; i < 3; ++i)
94 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
95 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
96 for (i = 0; i < 3; ++i)
97 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
98
99 /*
100 * o_vpos = vpos
101 * o_vtex[0..2] = vtex[0..2]
102 */
103 ureg_MOV(shader, o_vpos, vpos);
104 for (i = 0; i < 3; ++i)
105 ureg_MOV(shader, o_vtex[i], vtex[i]);
106
107 ureg_END(shader);
108
109 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
110 if (!r->i_vs)
111 return false;
112
113 return true;
114 }
115
116 static bool
117 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
118 {
119 struct ureg_program *shader;
120 struct ureg_src tc[3];
121 struct ureg_src sampler[3];
122 struct ureg_dst texel, temp;
123 struct ureg_dst fragment;
124 unsigned i;
125
126 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
127 if (!shader)
128 return false;
129
130 for (i = 0; i < 3; ++i) {
131 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
132 sampler[i] = ureg_DECL_sampler(shader, i);
133 }
134 texel = ureg_DECL_temporary(shader);
135 temp = ureg_DECL_temporary(shader);
136 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
137
138 /*
139 * texel.r = tex(tc[0], sampler[0])
140 * texel.g = tex(tc[1], sampler[1])
141 * texel.b = tex(tc[2], sampler[2])
142 * fragment = texel * scale
143 */
144 for (i = 0; i < 3; ++i) {
145 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
146 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
147 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
148 }
149 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
150
151 ureg_release_temporary(shader, texel);
152 ureg_release_temporary(shader, temp);
153 ureg_END(shader);
154
155 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
156 if (!r->i_fs)
157 return false;
158
159 return true;
160 }
161
162 static bool
163 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
164 {
165 struct ureg_program *shader;
166 struct ureg_src vpos, vtex[4];
167 struct ureg_dst o_vpos, o_vtex[4];
168 unsigned i;
169
170 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
171 if (!shader)
172 return false;
173
174 vpos = ureg_DECL_vs_input(shader, 0);
175 for (i = 0; i < 4; ++i)
176 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
177 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
178 for (i = 0; i < 4; ++i)
179 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
180
181 /*
182 * o_vpos = vpos
183 * o_vtex[0..2] = vtex[0..2]
184 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
185 */
186 ureg_MOV(shader, o_vpos, vpos);
187 for (i = 0; i < 3; ++i)
188 ureg_MOV(shader, o_vtex[i], vtex[i]);
189 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
190
191 ureg_END(shader);
192
193 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
194 if (!r->p_vs[0])
195 return false;
196
197 return true;
198 }
199
200 #if 0
201 static void
202 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
203 {
204 assert(false);
205 }
206 #endif
207
208 static bool
209 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
210 {
211 struct ureg_program *shader;
212 struct ureg_src tc[4];
213 struct ureg_src sampler[4];
214 struct ureg_dst texel, ref;
215 struct ureg_dst fragment;
216 unsigned i;
217
218 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
219 if (!shader)
220 return false;
221
222 for (i = 0; i < 4; ++i) {
223 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
224 sampler[i] = ureg_DECL_sampler(shader, i);
225 }
226 texel = ureg_DECL_temporary(shader);
227 ref = ureg_DECL_temporary(shader);
228 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
229
230 /*
231 * texel.r = tex(tc[0], sampler[0])
232 * texel.g = tex(tc[1], sampler[1])
233 * texel.b = tex(tc[2], sampler[2])
234 * ref = tex(tc[3], sampler[3])
235 * fragment = texel * scale + ref
236 */
237 for (i = 0; i < 3; ++i) {
238 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
239 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
240 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
241 }
242 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
243 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
244
245 ureg_release_temporary(shader, texel);
246 ureg_release_temporary(shader, ref);
247 ureg_END(shader);
248
249 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
250 if (!r->p_fs[0])
251 return false;
252
253 return true;
254 }
255
256 #if 0
257 static void
258 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
259 {
260 assert(false);
261 }
262 #endif
263
264 static bool
265 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
266 {
267 struct ureg_program *shader;
268 struct ureg_src vpos, vtex[5];
269 struct ureg_dst o_vpos, o_vtex[5];
270 unsigned i;
271
272 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
273 if (!shader)
274 return false;
275
276 vpos = ureg_DECL_vs_input(shader, 0);
277 for (i = 0; i < 4; ++i)
278 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
279 /* Skip input 5 */
280 vtex[4] = ureg_DECL_vs_input(shader, 6);
281 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
282 for (i = 0; i < 5; ++i)
283 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
284
285 /*
286 * o_vpos = vpos
287 * o_vtex[0..2] = vtex[0..2]
288 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
289 */
290 ureg_MOV(shader, o_vpos, vpos);
291 for (i = 0; i < 3; ++i)
292 ureg_MOV(shader, o_vtex[i], vtex[i]);
293 for (i = 3; i < 5; ++i)
294 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
295
296 ureg_END(shader);
297
298 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
299 if (!r->b_vs[0])
300 return false;
301
302 return true;
303 }
304
305 #if 0
306 static void
307 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
308 {
309 assert(false);
310 }
311 #endif
312
313 static bool
314 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
315 {
316 struct ureg_program *shader;
317 struct ureg_src tc[5];
318 struct ureg_src sampler[5];
319 struct ureg_dst texel, ref[2];
320 struct ureg_dst fragment;
321 unsigned i;
322
323 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
324 if (!shader)
325 return false;
326
327 for (i = 0; i < 5; ++i) {
328 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
329 sampler[i] = ureg_DECL_sampler(shader, i);
330 }
331 texel = ureg_DECL_temporary(shader);
332 ref[0] = ureg_DECL_temporary(shader);
333 ref[1] = ureg_DECL_temporary(shader);
334 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
335
336 /*
337 * texel.r = tex(tc[0], sampler[0])
338 * texel.g = tex(tc[1], sampler[1])
339 * texel.b = tex(tc[2], sampler[2])
340 * ref[0..1 = tex(tc[3..4], sampler[3..4])
341 * ref[0] = lerp(ref[0], ref[1], 0.5)
342 * fragment = texel * scale + ref[0]
343 */
344 for (i = 0; i < 3; ++i) {
345 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
346 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
347 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
348 }
349 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
350 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
351 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
352
353 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
354
355 ureg_release_temporary(shader, texel);
356 ureg_release_temporary(shader, ref[0]);
357 ureg_release_temporary(shader, ref[1]);
358 ureg_END(shader);
359
360 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
361 if (!r->b_fs[0])
362 return false;
363
364 return true;
365 }
366
367 #if 0
368 static void
369 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
370 {
371 assert(false);
372 }
373 #endif
374
375 static void
376 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
377 {
378 unsigned i;
379
380 assert(r);
381
382 for (i = 0; i < 3; ++i) {
383 struct pipe_box rect =
384 {
385 0, 0, 0,
386 r->textures.all[i]->width0,
387 r->textures.all[i]->height0,
388 1
389 };
390
391 r->tex_transfer[i] = r->pipe->get_transfer
392 (
393 r->pipe, r->textures.all[i],
394 u_subresource(0, 0),
395 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
396 &rect
397 );
398
399 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
400 }
401 }
402
403 static void
404 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
405 {
406 unsigned i;
407
408 assert(r);
409
410 for (i = 0; i < 3; ++i) {
411 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
412 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
413 }
414 }
415
416 static bool
417 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
418 {
419 struct pipe_sampler_state sampler;
420 unsigned filters[5];
421 unsigned i;
422
423 assert(r);
424
425 r->viewport.scale[0] = r->pot_buffers ?
426 util_next_power_of_two(r->picture_width) : r->picture_width;
427 r->viewport.scale[1] = r->pot_buffers ?
428 util_next_power_of_two(r->picture_height) : r->picture_height;
429 r->viewport.scale[2] = 1;
430 r->viewport.scale[3] = 1;
431 r->viewport.translate[0] = 0;
432 r->viewport.translate[1] = 0;
433 r->viewport.translate[2] = 0;
434 r->viewport.translate[3] = 0;
435
436 r->fb_state.width = r->pot_buffers ?
437 util_next_power_of_two(r->picture_width) : r->picture_width;
438 r->fb_state.height = r->pot_buffers ?
439 util_next_power_of_two(r->picture_height) : r->picture_height;
440 r->fb_state.nr_cbufs = 1;
441 r->fb_state.zsbuf = NULL;
442
443 /* Luma filter */
444 filters[0] = PIPE_TEX_FILTER_NEAREST;
445 /* Chroma filters */
446 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
447 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
448 filters[1] = PIPE_TEX_FILTER_NEAREST;
449 filters[2] = PIPE_TEX_FILTER_NEAREST;
450 }
451 else {
452 filters[1] = PIPE_TEX_FILTER_LINEAR;
453 filters[2] = PIPE_TEX_FILTER_LINEAR;
454 }
455 /* Fwd, bkwd ref filters */
456 filters[3] = PIPE_TEX_FILTER_LINEAR;
457 filters[4] = PIPE_TEX_FILTER_LINEAR;
458
459 for (i = 0; i < 5; ++i) {
460 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
461 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
462 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
463 sampler.min_img_filter = filters[i];
464 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
465 sampler.mag_img_filter = filters[i];
466 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
467 sampler.compare_func = PIPE_FUNC_ALWAYS;
468 sampler.normalized_coords = 1;
469 /*sampler.shadow_ambient = ; */
470 /*sampler.lod_bias = ; */
471 sampler.min_lod = 0;
472 /*sampler.max_lod = ; */
473 /*sampler.border_color[i] = ; */
474 /*sampler.max_anisotropy = ; */
475 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
476 }
477
478 return true;
479 }
480
481 static void
482 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
483 {
484 unsigned i;
485
486 assert(r);
487
488 for (i = 0; i < 5; ++i)
489 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
490 }
491
492 static bool
493 init_shaders(struct vl_mpeg12_mc_renderer *r)
494 {
495 assert(r);
496
497 create_intra_vert_shader(r);
498 create_intra_frag_shader(r);
499 create_frame_pred_vert_shader(r);
500 create_frame_pred_frag_shader(r);
501 create_frame_bi_pred_vert_shader(r);
502 create_frame_bi_pred_frag_shader(r);
503
504 return true;
505 }
506
507 static void
508 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
509 {
510 assert(r);
511
512 r->pipe->delete_vs_state(r->pipe, r->i_vs);
513 r->pipe->delete_fs_state(r->pipe, r->i_fs);
514 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
515 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
516 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
517 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
518 }
519
520 static bool
521 init_buffers(struct vl_mpeg12_mc_renderer *r)
522 {
523 struct pipe_resource template;
524 struct pipe_vertex_element vertex_elems[8];
525 struct pipe_sampler_view sampler_view;
526
527 const unsigned mbw =
528 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
529 const unsigned mbh =
530 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
531
532 unsigned i;
533
534 assert(r);
535
536 r->macroblocks_per_batch =
537 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
538 r->num_macroblocks = 0;
539 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
540
541 memset(&template, 0, sizeof(struct pipe_resource));
542 template.target = PIPE_TEXTURE_2D;
543 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
544 template.format = PIPE_FORMAT_R16_SNORM;
545 template.last_level = 0;
546 template.width0 = r->pot_buffers ?
547 util_next_power_of_two(r->picture_width) : r->picture_width;
548 template.height0 = r->pot_buffers ?
549 util_next_power_of_two(r->picture_height) : r->picture_height;
550 template.depth0 = 1;
551 template.usage = PIPE_USAGE_DYNAMIC;
552 template.bind = PIPE_BIND_SAMPLER_VIEW;
553 template.flags = 0;
554
555 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
556
557 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
558 template.width0 = r->pot_buffers ?
559 util_next_power_of_two(r->picture_width / 2) :
560 r->picture_width / 2;
561 template.height0 = r->pot_buffers ?
562 util_next_power_of_two(r->picture_height / 2) :
563 r->picture_height / 2;
564 }
565 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
566 template.height0 = r->pot_buffers ?
567 util_next_power_of_two(r->picture_height / 2) :
568 r->picture_height / 2;
569
570 r->textures.individual.cb =
571 r->pipe->screen->resource_create(r->pipe->screen, &template);
572 r->textures.individual.cr =
573 r->pipe->screen->resource_create(r->pipe->screen, &template);
574
575 for (i = 0; i < 3; ++i) {
576 u_sampler_view_default_template(&sampler_view,
577 r->textures.all[i],
578 r->textures.all[i]->format);
579 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
580 }
581
582 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
583 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
584 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
585 /* XXX: Create with usage DYNAMIC or STREAM */
586 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
587 (
588 r->pipe->screen,
589 PIPE_BIND_VERTEX_BUFFER,
590 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
591 );
592
593 for (i = 1; i < 3; ++i) {
594 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
595 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
596 r->vertex_bufs.all[i].buffer_offset = 0;
597 /* XXX: Create with usage DYNAMIC or STREAM */
598 r->vertex_bufs.all[i].buffer = pipe_buffer_create
599 (
600 r->pipe->screen,
601 PIPE_BIND_VERTEX_BUFFER,
602 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
603 );
604 }
605
606 /* Position element */
607 vertex_elems[0].src_offset = 0;
608 vertex_elems[0].instance_divisor = 0;
609 vertex_elems[0].vertex_buffer_index = 0;
610 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
611
612 /* Luma, texcoord element */
613 vertex_elems[1].src_offset = sizeof(struct vertex2f);
614 vertex_elems[1].instance_divisor = 0;
615 vertex_elems[1].vertex_buffer_index = 0;
616 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
617
618 /* Chroma Cr texcoord element */
619 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
620 vertex_elems[2].instance_divisor = 0;
621 vertex_elems[2].vertex_buffer_index = 0;
622 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
623
624 /* Chroma Cb texcoord element */
625 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
626 vertex_elems[3].instance_divisor = 0;
627 vertex_elems[3].vertex_buffer_index = 0;
628 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
629
630 /* First ref surface top field texcoord element */
631 vertex_elems[4].src_offset = 0;
632 vertex_elems[4].instance_divisor = 0;
633 vertex_elems[4].vertex_buffer_index = 1;
634 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
635
636 /* First ref surface bottom field texcoord element */
637 vertex_elems[5].src_offset = sizeof(struct vertex2f);
638 vertex_elems[5].instance_divisor = 0;
639 vertex_elems[5].vertex_buffer_index = 1;
640 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
641
642 /* Second ref surface top field texcoord element */
643 vertex_elems[6].src_offset = 0;
644 vertex_elems[6].instance_divisor = 0;
645 vertex_elems[6].vertex_buffer_index = 2;
646 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
647
648 /* Second ref surface bottom field texcoord element */
649 vertex_elems[7].src_offset = sizeof(struct vertex2f);
650 vertex_elems[7].instance_divisor = 0;
651 vertex_elems[7].vertex_buffer_index = 2;
652 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
653
654 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
655 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
656 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
657
658 r->vs_const_buf = pipe_buffer_create
659 (
660 r->pipe->screen,
661 PIPE_BIND_CONSTANT_BUFFER,
662 sizeof(struct vertex_shader_consts)
663 );
664
665 return true;
666 }
667
668 static void
669 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
670 {
671 unsigned i;
672
673 assert(r);
674
675 pipe_resource_reference(&r->vs_const_buf, NULL);
676
677 for (i = 0; i < 3; ++i) {
678 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
679 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
680 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
681 pipe_resource_reference(&r->textures.all[i], NULL);
682 }
683
684 FREE(r->macroblock_buf);
685 }
686
687 static enum MACROBLOCK_TYPE
688 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
689 {
690 assert(mb);
691
692 switch (mb->mb_type) {
693 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
694 return MACROBLOCK_TYPE_INTRA;
695 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
696 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
697 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
698 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
699 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
700 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
701 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
702 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
703 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
704 default:
705 assert(0);
706 }
707
708 /* Unreachable */
709 return -1;
710 }
711
712 static void
713 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
714 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
715 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
716 bool use_zeroblocks, struct vertex2f *zero_blocks)
717 {
718 struct vertex2f v;
719
720 assert(vb);
721 assert(unit && half && offset);
722 assert(zero_blocks || !use_zeroblocks);
723
724 /* Generate vertices for two triangles covering a block */
725 v.x = mbx * unit->x + offset->x;
726 v.y = mby * unit->y + offset->y;
727
728 vb[0].pos.x = v.x;
729 vb[0].pos.y = v.y;
730 vb[1].pos.x = v.x;
731 vb[1].pos.y = v.y + half->y;
732 vb[2].pos.x = v.x + half->x;
733 vb[2].pos.y = v.y;
734 vb[3].pos.x = v.x + half->x;
735 vb[3].pos.y = v.y;
736 vb[4].pos.x = v.x;
737 vb[4].pos.y = v.y + half->y;
738 vb[5].pos.x = v.x + half->x;
739 vb[5].pos.y = v.y + half->y;
740
741 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
742 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
743 for this channel is defined for this block) */
744
745 if (!use_zeroblocks || cbp & luma_mask) {
746 v.x = mbx * unit->x + offset->x;
747 v.y = mby * unit->y + offset->y;
748 }
749 else {
750 v.x = zero_blocks[0].x;
751 v.y = zero_blocks[0].y;
752 }
753
754 vb[0].luma_tc.x = v.x;
755 vb[0].luma_tc.y = v.y;
756 vb[1].luma_tc.x = v.x;
757 vb[1].luma_tc.y = v.y + half->y;
758 vb[2].luma_tc.x = v.x + half->x;
759 vb[2].luma_tc.y = v.y;
760 vb[3].luma_tc.x = v.x + half->x;
761 vb[3].luma_tc.y = v.y;
762 vb[4].luma_tc.x = v.x;
763 vb[4].luma_tc.y = v.y + half->y;
764 vb[5].luma_tc.x = v.x + half->x;
765 vb[5].luma_tc.y = v.y + half->y;
766
767 if (!use_zeroblocks || cbp & cb_mask) {
768 v.x = mbx * unit->x + offset->x;
769 v.y = mby * unit->y + offset->y;
770 }
771 else {
772 v.x = zero_blocks[1].x;
773 v.y = zero_blocks[1].y;
774 }
775
776 vb[0].cb_tc.x = v.x;
777 vb[0].cb_tc.y = v.y;
778 vb[1].cb_tc.x = v.x;
779 vb[1].cb_tc.y = v.y + half->y;
780 vb[2].cb_tc.x = v.x + half->x;
781 vb[2].cb_tc.y = v.y;
782 vb[3].cb_tc.x = v.x + half->x;
783 vb[3].cb_tc.y = v.y;
784 vb[4].cb_tc.x = v.x;
785 vb[4].cb_tc.y = v.y + half->y;
786 vb[5].cb_tc.x = v.x + half->x;
787 vb[5].cb_tc.y = v.y + half->y;
788
789 if (!use_zeroblocks || cbp & cr_mask) {
790 v.x = mbx * unit->x + offset->x;
791 v.y = mby * unit->y + offset->y;
792 }
793 else {
794 v.x = zero_blocks[2].x;
795 v.y = zero_blocks[2].y;
796 }
797
798 vb[0].cr_tc.x = v.x;
799 vb[0].cr_tc.y = v.y;
800 vb[1].cr_tc.x = v.x;
801 vb[1].cr_tc.y = v.y + half->y;
802 vb[2].cr_tc.x = v.x + half->x;
803 vb[2].cr_tc.y = v.y;
804 vb[3].cr_tc.x = v.x + half->x;
805 vb[3].cr_tc.y = v.y;
806 vb[4].cr_tc.x = v.x;
807 vb[4].cr_tc.y = v.y + half->y;
808 vb[5].cr_tc.x = v.x + half->x;
809 vb[5].cr_tc.y = v.y + half->y;
810 }
811
812 static void
813 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
814 struct pipe_mpeg12_macroblock *mb, unsigned pos,
815 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
816 {
817 struct vertex2f mo_vec[2];
818
819 unsigned i;
820
821 assert(r);
822 assert(mb);
823 assert(ycbcr_vb);
824 assert(pos < r->macroblocks_per_batch);
825
826 mo_vec[1].x = 0;
827 mo_vec[1].y = 0;
828
829 switch (mb->mb_type) {
830 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
831 {
832 struct vertex2f *vb;
833
834 assert(ref_vb && ref_vb[1]);
835
836 vb = ref_vb[1] + pos * 2 * 24;
837
838 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
839 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
840
841 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
842 for (i = 0; i < 24 * 2; i += 2) {
843 vb[i].x = mo_vec[0].x;
844 vb[i].y = mo_vec[0].y;
845 }
846 }
847 else {
848 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
849 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
850
851 for (i = 0; i < 24 * 2; i += 2) {
852 vb[i].x = mo_vec[0].x;
853 vb[i].y = mo_vec[0].y;
854 vb[i + 1].x = mo_vec[1].x;
855 vb[i + 1].y = mo_vec[1].y;
856 }
857 }
858
859 /* fall-through */
860 }
861 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
862 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
863 {
864 struct vertex2f *vb;
865
866 assert(ref_vb && ref_vb[0]);
867
868 vb = ref_vb[0] + pos * 2 * 24;
869
870 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
871 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
872 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
873
874 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
875 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
876 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
877 }
878 }
879 else {
880 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
881 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
882
883 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
884 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
885 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
886 }
887 }
888
889 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
890 for (i = 0; i < 24 * 2; i += 2) {
891 vb[i].x = mo_vec[0].x;
892 vb[i].y = mo_vec[0].y;
893 }
894 }
895 else {
896 for (i = 0; i < 24 * 2; i += 2) {
897 vb[i].x = mo_vec[0].x;
898 vb[i].y = mo_vec[0].y;
899 vb[i + 1].x = mo_vec[1].x;
900 vb[i + 1].y = mo_vec[1].y;
901 }
902 }
903
904 /* fall-through */
905 }
906 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
907 {
908 const struct vertex2f unit =
909 {
910 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
911 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
912 };
913 const struct vertex2f half =
914 {
915 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
916 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
917 };
918 const struct vertex2f offsets[2][2] =
919 {
920 {
921 {0, 0}, {0, half.y}
922 },
923 {
924 {half.x, 0}, {half.x, half.y}
925 }
926 };
927 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
928
929 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
930
931 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
932 &unit, &half, &offsets[0][0],
933 32, 2, 1, use_zb, r->zero_block);
934
935 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
936 &unit, &half, &offsets[1][0],
937 16, 2, 1, use_zb, r->zero_block);
938
939 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
940 &unit, &half, &offsets[0][1],
941 8, 2, 1, use_zb, r->zero_block);
942
943 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
944 &unit, &half, &offsets[1][1],
945 4, 2, 1, use_zb, r->zero_block);
946
947 break;
948 }
949 default:
950 assert(0);
951 }
952 }
953
954 static void
955 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
956 unsigned *num_macroblocks)
957 {
958 unsigned offset[NUM_MACROBLOCK_TYPES];
959 struct vert_stream_0 *ycbcr_vb;
960 struct vertex2f *ref_vb[2];
961 struct pipe_transfer *buf_transfer[3];
962 unsigned i;
963
964 assert(r);
965 assert(num_macroblocks);
966
967 for (i = 0; i < r->num_macroblocks; ++i) {
968 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
969 ++num_macroblocks[mb_type];
970 }
971
972 offset[0] = 0;
973
974 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
975 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
976
977 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
978 (
979 r->pipe,
980 r->vertex_bufs.individual.ycbcr.buffer,
981 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
982 &buf_transfer[0]
983 );
984
985 for (i = 0; i < 2; ++i)
986 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
987 (
988 r->pipe,
989 r->vertex_bufs.individual.ref[i].buffer,
990 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
991 &buf_transfer[i + 1]
992 );
993
994 for (i = 0; i < r->num_macroblocks; ++i) {
995 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
996
997 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
998 ycbcr_vb, ref_vb);
999
1000 ++offset[mb_type];
1001 }
1002
1003 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1004 for (i = 0; i < 2; ++i)
1005 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1006 }
1007
1008 static struct pipe_sampler_view
1009 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1010 {
1011 struct pipe_sampler_view *sampler_view;
1012 assert(r);
1013 assert(surface);
1014
1015 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1016 if (!sampler_view) {
1017 struct pipe_sampler_view templat;
1018 boolean added_to_map;
1019
1020 u_sampler_view_default_template(&templat, surface->texture,
1021 surface->texture->format);
1022 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1023 &templat);
1024 if (!sampler_view)
1025 return NULL;
1026
1027 added_to_map = util_keymap_insert(r->texview_map, &surface,
1028 sampler_view, r->pipe);
1029 assert(added_to_map);
1030 }
1031
1032 return sampler_view;
1033 }
1034
1035 static void
1036 flush(struct vl_mpeg12_mc_renderer *r)
1037 {
1038 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1039 unsigned vb_start = 0;
1040 struct vertex_shader_consts *vs_consts;
1041 struct pipe_transfer *buf_transfer;
1042
1043 unsigned i;
1044
1045 assert(r);
1046 assert(r->num_macroblocks == r->macroblocks_per_batch);
1047
1048 gen_macroblock_stream(r, num_macroblocks);
1049
1050 r->fb_state.cbufs[0] = r->surface;
1051
1052 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1053 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1054
1055 vs_consts = pipe_buffer_map
1056 (
1057 r->pipe, r->vs_const_buf,
1058 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1059 &buf_transfer
1060 );
1061
1062 vs_consts->denorm.x = r->surface->width;
1063 vs_consts->denorm.y = r->surface->height;
1064
1065 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1066
1067 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1068 r->vs_const_buf);
1069
1070
1071 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1072 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1073 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1074 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1075 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1076 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1077 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1078
1079 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1080
1081 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1082 }
1083
1084 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1085 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1086 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1087 r->textures.individual.ref[0] = r->past->texture;
1088 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1089 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1090 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1091 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1092 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1093
1094 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1095
1096 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1097 }
1098
1099 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1100 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1101 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1102 r->textures.individual.ref[0] = r->past->texture;
1103 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1104 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1105 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1106 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1107 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1108
1109 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1110
1111 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1112 }
1113
1114 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1115 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1116 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1117 r->textures.individual.ref[0] = r->future->texture;
1118 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1119 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1120 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1121 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1122 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1123
1124 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1125
1126 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1127 }
1128
1129 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
1130 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1131 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1132 r->textures.individual.ref[0] = r->future->texture;
1133 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1134 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1135 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1136 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1137 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1138
1139 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1140
1141 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1142 }
1143
1144 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1145 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1146 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1147 r->textures.individual.ref[0] = r->past->texture;
1148 r->textures.individual.ref[1] = r->future->texture;
1149 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1150 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1151 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1152 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1153 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1154 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1155
1156 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1157
1158 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1159 }
1160
1161 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1162 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1163 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1164 r->textures.individual.ref[0] = r->past->texture;
1165 r->textures.individual.ref[1] = r->future->texture;
1166 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1167 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1168 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1169 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1170 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1171 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1172
1173 util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1174
1175 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1176 }
1177
1178 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1179
1180 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1181 for (i = 0; i < 3; ++i)
1182 r->zero_block[i].x = ZERO_BLOCK_NIL;
1183
1184 r->num_macroblocks = 0;
1185 }
1186
1187 static void
1188 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1189 {
1190 unsigned y;
1191
1192 assert(src);
1193 assert(dst);
1194
1195 for (y = 0; y < BLOCK_HEIGHT; ++y)
1196 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1197 }
1198
1199 static void
1200 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1201 {
1202 unsigned y;
1203
1204 assert(src);
1205 assert(dst);
1206
1207 for (y = 0; y < BLOCK_HEIGHT; ++y)
1208 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1209 }
1210
1211 static void
1212 fill_zero_block(short *dst, unsigned dst_pitch)
1213 {
1214 unsigned y;
1215
1216 assert(dst);
1217
1218 for (y = 0; y < BLOCK_HEIGHT; ++y)
1219 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1220 }
1221
1222 static void
1223 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1224 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1225 {
1226 unsigned tex_pitch;
1227 short *texels;
1228 unsigned tb = 0, sb = 0;
1229 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1230 unsigned x, y;
1231
1232 assert(r);
1233 assert(blocks);
1234
1235 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1236 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1237
1238 for (y = 0; y < 2; ++y) {
1239 for (x = 0; x < 2; ++x, ++tb) {
1240 if ((cbp >> (5 - tb)) & 1) {
1241 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1242 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1243 texels + y * tex_pitch * BLOCK_WIDTH +
1244 x * BLOCK_WIDTH, tex_pitch);
1245 }
1246 else {
1247 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1248 texels + y * tex_pitch + x * BLOCK_WIDTH,
1249 tex_pitch);
1250 }
1251
1252 ++sb;
1253 }
1254 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1255 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1256 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1257 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1258 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1259 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1260 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1261 }
1262 }
1263 }
1264 }
1265 }
1266
1267 /* TODO: Implement 422, 444 */
1268 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1269
1270 mbpx /= 2;
1271 mbpy /= 2;
1272
1273 for (tb = 0; tb < 2; ++tb) {
1274 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1275 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1276
1277 if ((cbp >> (1 - tb)) & 1) {
1278 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1279 ++sb;
1280 }
1281 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1282 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1283 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1284 fill_zero_block(texels, tex_pitch);
1285 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1286 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1287 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1288 }
1289 }
1290 }
1291 }
1292 }
1293
1294 static void
1295 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1296 struct pipe_mpeg12_macroblock *mb)
1297 {
1298 assert(r);
1299 assert(mb);
1300 assert(mb->blocks);
1301 assert(r->num_macroblocks < r->macroblocks_per_batch);
1302
1303 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1304 sizeof(struct pipe_mpeg12_macroblock));
1305
1306 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1307
1308 ++r->num_macroblocks;
1309 }
1310
1311 static void
1312 texview_map_delete(const struct keymap *map,
1313 const void *key, void *data,
1314 void *user)
1315 {
1316 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1317
1318 assert(map);
1319 assert(key);
1320 assert(data);
1321 assert(user);
1322
1323 pipe_sampler_view_reference(&sv, NULL);
1324 }
1325
1326 bool
1327 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1328 struct pipe_context *pipe,
1329 unsigned picture_width,
1330 unsigned picture_height,
1331 enum pipe_video_chroma_format chroma_format,
1332 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1333 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1334 bool pot_buffers)
1335 {
1336 unsigned i;
1337
1338 assert(renderer);
1339 assert(pipe);
1340 /* TODO: Implement other policies */
1341 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1342 /* TODO: Implement this */
1343 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1344 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1345 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1346 assert(pot_buffers);
1347
1348 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1349
1350 renderer->pipe = pipe;
1351 renderer->picture_width = picture_width;
1352 renderer->picture_height = picture_height;
1353 renderer->chroma_format = chroma_format;
1354 renderer->bufmode = bufmode;
1355 renderer->eb_handling = eb_handling;
1356 renderer->pot_buffers = pot_buffers;
1357
1358 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1359 texview_map_delete);
1360 if (!renderer->texview_map)
1361 return false;
1362
1363 if (!init_pipe_state(renderer)) {
1364 util_delete_keymap(renderer->texview_map, renderer->pipe);
1365 return false;
1366 }
1367 if (!init_shaders(renderer)) {
1368 util_delete_keymap(renderer->texview_map, renderer->pipe);
1369 cleanup_pipe_state(renderer);
1370 return false;
1371 }
1372 if (!init_buffers(renderer)) {
1373 util_delete_keymap(renderer->texview_map, renderer->pipe);
1374 cleanup_shaders(renderer);
1375 cleanup_pipe_state(renderer);
1376 return false;
1377 }
1378
1379 renderer->surface = NULL;
1380 renderer->past = NULL;
1381 renderer->future = NULL;
1382 for (i = 0; i < 3; ++i)
1383 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1384 renderer->num_macroblocks = 0;
1385
1386 xfer_buffers_map(renderer);
1387
1388 return true;
1389 }
1390
1391 void
1392 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1393 {
1394 assert(renderer);
1395
1396 xfer_buffers_unmap(renderer);
1397
1398 util_delete_keymap(renderer->texview_map, renderer->pipe);
1399 cleanup_pipe_state(renderer);
1400 cleanup_shaders(renderer);
1401 cleanup_buffers(renderer);
1402
1403 pipe_surface_reference(&renderer->surface, NULL);
1404 pipe_surface_reference(&renderer->past, NULL);
1405 pipe_surface_reference(&renderer->future, NULL);
1406 }
1407
1408 void
1409 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1410 *renderer,
1411 struct pipe_surface *surface,
1412 struct pipe_surface *past,
1413 struct pipe_surface *future,
1414 unsigned num_macroblocks,
1415 struct pipe_mpeg12_macroblock
1416 *mpeg12_macroblocks,
1417 struct pipe_fence_handle **fence)
1418 {
1419 bool new_surface = false;
1420
1421 assert(renderer);
1422 assert(surface);
1423 assert(num_macroblocks);
1424 assert(mpeg12_macroblocks);
1425
1426 if (renderer->surface) {
1427 if (surface != renderer->surface) {
1428 if (renderer->num_macroblocks > 0) {
1429 xfer_buffers_unmap(renderer);
1430 flush(renderer);
1431 }
1432
1433 new_surface = true;
1434 }
1435
1436 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1437 assert(surface != renderer->surface || renderer->past == past);
1438 assert(surface != renderer->surface || renderer->future == future);
1439 }
1440 else
1441 new_surface = true;
1442
1443 if (new_surface) {
1444 pipe_surface_reference(&renderer->surface, surface);
1445 pipe_surface_reference(&renderer->past, past);
1446 pipe_surface_reference(&renderer->future, future);
1447 renderer->fence = fence;
1448 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1449 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1450 }
1451
1452 while (num_macroblocks) {
1453 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1454 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1455 unsigned i;
1456
1457 for (i = 0; i < num_to_submit; ++i) {
1458 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1459 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1460 }
1461
1462 num_macroblocks -= num_to_submit;
1463
1464 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1465 xfer_buffers_unmap(renderer);
1466 flush(renderer);
1467 xfer_buffers_map(renderer);
1468 /* Next time we get this surface it may have new ref frames */
1469 pipe_surface_reference(&renderer->surface, NULL);
1470 pipe_surface_reference(&renderer->past, NULL);
1471 pipe_surface_reference(&renderer->future, NULL);
1472 }
1473 }
1474 }