vl: Dec sampler view refs instead of destroying them.
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <util/u_keymap.h>
36 #include <util/u_sampler.h>
37 #include <tgsi/tgsi_ureg.h>
38
39 #define DEFAULT_BUF_ALIGNMENT 1
40 #define MACROBLOCK_WIDTH 16
41 #define MACROBLOCK_HEIGHT 16
42 #define BLOCK_WIDTH 8
43 #define BLOCK_HEIGHT 8
44 #define ZERO_BLOCK_NIL -1.0f
45 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
46 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
47
48 struct vertex_shader_consts
49 {
50 struct vertex4f denorm;
51 };
52
53 struct fragment_shader_consts
54 {
55 struct vertex4f multiplier;
56 struct vertex4f div;
57 };
58
59 struct vert_stream_0
60 {
61 struct vertex2f pos;
62 struct vertex2f luma_tc;
63 struct vertex2f cb_tc;
64 struct vertex2f cr_tc;
65 };
66
67 enum MACROBLOCK_TYPE
68 {
69 MACROBLOCK_TYPE_INTRA,
70 MACROBLOCK_TYPE_FWD_FRAME_PRED,
71 MACROBLOCK_TYPE_FWD_FIELD_PRED,
72 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
73 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
74 MACROBLOCK_TYPE_BI_FRAME_PRED,
75 MACROBLOCK_TYPE_BI_FIELD_PRED,
76
77 NUM_MACROBLOCK_TYPES
78 };
79
80 static bool
81 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
82 {
83 struct ureg_program *shader;
84 struct ureg_src vpos, vtex[3];
85 struct ureg_dst o_vpos, o_vtex[3];
86 unsigned i;
87
88 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
89 if (!shader)
90 return false;
91
92 vpos = ureg_DECL_vs_input(shader, 0);
93 for (i = 0; i < 3; ++i)
94 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
95 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
96 for (i = 0; i < 3; ++i)
97 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
98
99 /*
100 * o_vpos = vpos
101 * o_vtex[0..2] = vtex[0..2]
102 */
103 ureg_MOV(shader, o_vpos, vpos);
104 for (i = 0; i < 3; ++i)
105 ureg_MOV(shader, o_vtex[i], vtex[i]);
106
107 ureg_END(shader);
108
109 r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
110 if (!r->i_vs)
111 return false;
112
113 return true;
114 }
115
116 static bool
117 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
118 {
119 struct ureg_program *shader;
120 struct ureg_src tc[3];
121 struct ureg_src sampler[3];
122 struct ureg_dst texel, temp;
123 struct ureg_dst fragment;
124 unsigned i;
125
126 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
127 if (!shader)
128 return false;
129
130 for (i = 0; i < 3; ++i) {
131 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
132 sampler[i] = ureg_DECL_sampler(shader, i);
133 }
134 texel = ureg_DECL_temporary(shader);
135 temp = ureg_DECL_temporary(shader);
136 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
137
138 /*
139 * texel.r = tex(tc[0], sampler[0])
140 * texel.g = tex(tc[1], sampler[1])
141 * texel.b = tex(tc[2], sampler[2])
142 * fragment = texel * scale
143 */
144 for (i = 0; i < 3; ++i) {
145 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
146 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
147 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
148 }
149 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
150
151 ureg_release_temporary(shader, texel);
152 ureg_release_temporary(shader, temp);
153 ureg_END(shader);
154
155 r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
156 if (!r->i_fs)
157 return false;
158
159 return true;
160 }
161
162 static bool
163 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
164 {
165 struct ureg_program *shader;
166 struct ureg_src vpos, vtex[4];
167 struct ureg_dst o_vpos, o_vtex[4];
168 unsigned i;
169
170 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
171 if (!shader)
172 return false;
173
174 vpos = ureg_DECL_vs_input(shader, 0);
175 for (i = 0; i < 4; ++i)
176 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
177 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
178 for (i = 0; i < 4; ++i)
179 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
180
181 /*
182 * o_vpos = vpos
183 * o_vtex[0..2] = vtex[0..2]
184 * o_vtex[3] = vpos + vtex[3] // Apply motion vector
185 */
186 ureg_MOV(shader, o_vpos, vpos);
187 for (i = 0; i < 3; ++i)
188 ureg_MOV(shader, o_vtex[i], vtex[i]);
189 ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
190
191 ureg_END(shader);
192
193 r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
194 if (!r->p_vs[0])
195 return false;
196
197 return true;
198 }
199
200 #if 0
201 static void
202 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
203 {
204 assert(false);
205 }
206 #endif
207
208 static bool
209 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
210 {
211 struct ureg_program *shader;
212 struct ureg_src tc[4];
213 struct ureg_src sampler[4];
214 struct ureg_dst texel, ref;
215 struct ureg_dst fragment;
216 unsigned i;
217
218 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
219 if (!shader)
220 return false;
221
222 for (i = 0; i < 4; ++i) {
223 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
224 sampler[i] = ureg_DECL_sampler(shader, i);
225 }
226 texel = ureg_DECL_temporary(shader);
227 ref = ureg_DECL_temporary(shader);
228 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
229
230 /*
231 * texel.r = tex(tc[0], sampler[0])
232 * texel.g = tex(tc[1], sampler[1])
233 * texel.b = tex(tc[2], sampler[2])
234 * ref = tex(tc[3], sampler[3])
235 * fragment = texel * scale + ref
236 */
237 for (i = 0; i < 3; ++i) {
238 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
239 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
240 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
241 }
242 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
243 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
244
245 ureg_release_temporary(shader, texel);
246 ureg_release_temporary(shader, ref);
247 ureg_END(shader);
248
249 r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
250 if (!r->p_fs[0])
251 return false;
252
253 return true;
254 }
255
256 #if 0
257 static void
258 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
259 {
260 assert(false);
261 }
262 #endif
263
264 static bool
265 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
266 {
267 struct ureg_program *shader;
268 struct ureg_src vpos, vtex[5];
269 struct ureg_dst o_vpos, o_vtex[5];
270 unsigned i;
271
272 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
273 if (!shader)
274 return false;
275
276 vpos = ureg_DECL_vs_input(shader, 0);
277 for (i = 0; i < 4; ++i)
278 vtex[i] = ureg_DECL_vs_input(shader, i + 1);
279 /* Skip input 5 */
280 vtex[4] = ureg_DECL_vs_input(shader, 6);
281 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
282 for (i = 0; i < 5; ++i)
283 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
284
285 /*
286 * o_vpos = vpos
287 * o_vtex[0..2] = vtex[0..2]
288 * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
289 */
290 ureg_MOV(shader, o_vpos, vpos);
291 for (i = 0; i < 3; ++i)
292 ureg_MOV(shader, o_vtex[i], vtex[i]);
293 for (i = 3; i < 5; ++i)
294 ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
295
296 ureg_END(shader);
297
298 r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
299 if (!r->b_vs[0])
300 return false;
301
302 return true;
303 }
304
305 #if 0
306 static void
307 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
308 {
309 assert(false);
310 }
311 #endif
312
313 static bool
314 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
315 {
316 struct ureg_program *shader;
317 struct ureg_src tc[5];
318 struct ureg_src sampler[5];
319 struct ureg_dst texel, ref[2];
320 struct ureg_dst fragment;
321 unsigned i;
322
323 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
324 if (!shader)
325 return false;
326
327 for (i = 0; i < 5; ++i) {
328 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
329 sampler[i] = ureg_DECL_sampler(shader, i);
330 }
331 texel = ureg_DECL_temporary(shader);
332 ref[0] = ureg_DECL_temporary(shader);
333 ref[1] = ureg_DECL_temporary(shader);
334 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
335
336 /*
337 * texel.r = tex(tc[0], sampler[0])
338 * texel.g = tex(tc[1], sampler[1])
339 * texel.b = tex(tc[2], sampler[2])
340 * ref[0..1 = tex(tc[3..4], sampler[3..4])
341 * ref[0] = lerp(ref[0], ref[1], 0.5)
342 * fragment = texel * scale + ref[0]
343 */
344 for (i = 0; i < 3; ++i) {
345 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
346 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
347 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
348 }
349 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
350 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
351 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
352
353 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
354
355 ureg_release_temporary(shader, texel);
356 ureg_release_temporary(shader, ref[0]);
357 ureg_release_temporary(shader, ref[1]);
358 ureg_END(shader);
359
360 r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
361 if (!r->b_fs[0])
362 return false;
363
364 return true;
365 }
366
367 #if 0
368 static void
369 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
370 {
371 assert(false);
372 }
373 #endif
374
375 static void
376 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
377 {
378 unsigned i;
379
380 assert(r);
381
382 for (i = 0; i < 3; ++i) {
383 struct pipe_box rect =
384 {
385 0, 0, 0,
386 r->textures.all[i]->width0,
387 r->textures.all[i]->height0,
388 0
389 };
390
391 r->tex_transfer[i] = r->pipe->get_transfer
392 (
393 r->pipe, r->textures.all[i],
394 u_subresource(0, 0),
395 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
396 &rect
397 );
398
399 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
400 }
401 }
402
403 static void
404 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
405 {
406 unsigned i;
407
408 assert(r);
409
410 for (i = 0; i < 3; ++i) {
411 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
412 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
413 }
414 }
415
416 static bool
417 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
418 {
419 struct pipe_sampler_state sampler;
420 unsigned filters[5];
421 unsigned i;
422
423 assert(r);
424
425 r->viewport.scale[0] = r->pot_buffers ?
426 util_next_power_of_two(r->picture_width) : r->picture_width;
427 r->viewport.scale[1] = r->pot_buffers ?
428 util_next_power_of_two(r->picture_height) : r->picture_height;
429 r->viewport.scale[2] = 1;
430 r->viewport.scale[3] = 1;
431 r->viewport.translate[0] = 0;
432 r->viewport.translate[1] = 0;
433 r->viewport.translate[2] = 0;
434 r->viewport.translate[3] = 0;
435
436 r->fb_state.width = r->pot_buffers ?
437 util_next_power_of_two(r->picture_width) : r->picture_width;
438 r->fb_state.height = r->pot_buffers ?
439 util_next_power_of_two(r->picture_height) : r->picture_height;
440 r->fb_state.nr_cbufs = 1;
441 r->fb_state.zsbuf = NULL;
442
443 /* Luma filter */
444 filters[0] = PIPE_TEX_FILTER_NEAREST;
445 /* Chroma filters */
446 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
447 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
448 filters[1] = PIPE_TEX_FILTER_NEAREST;
449 filters[2] = PIPE_TEX_FILTER_NEAREST;
450 }
451 else {
452 filters[1] = PIPE_TEX_FILTER_LINEAR;
453 filters[2] = PIPE_TEX_FILTER_LINEAR;
454 }
455 /* Fwd, bkwd ref filters */
456 filters[3] = PIPE_TEX_FILTER_LINEAR;
457 filters[4] = PIPE_TEX_FILTER_LINEAR;
458
459 for (i = 0; i < 5; ++i) {
460 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
461 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
462 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
463 sampler.min_img_filter = filters[i];
464 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
465 sampler.mag_img_filter = filters[i];
466 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
467 sampler.compare_func = PIPE_FUNC_ALWAYS;
468 sampler.normalized_coords = 1;
469 /*sampler.shadow_ambient = ; */
470 /*sampler.lod_bias = ; */
471 sampler.min_lod = 0;
472 /*sampler.max_lod = ; */
473 /*sampler.border_color[i] = ; */
474 /*sampler.max_anisotropy = ; */
475 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
476 }
477
478 return true;
479 }
480
481 static void
482 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
483 {
484 unsigned i;
485
486 assert(r);
487
488 for (i = 0; i < 5; ++i)
489 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
490 }
491
492 static bool
493 init_shaders(struct vl_mpeg12_mc_renderer *r)
494 {
495 assert(r);
496
497 create_intra_vert_shader(r);
498 create_intra_frag_shader(r);
499 create_frame_pred_vert_shader(r);
500 create_frame_pred_frag_shader(r);
501 create_frame_bi_pred_vert_shader(r);
502 create_frame_bi_pred_frag_shader(r);
503
504 return true;
505 }
506
507 static void
508 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
509 {
510 assert(r);
511
512 r->pipe->delete_vs_state(r->pipe, r->i_vs);
513 r->pipe->delete_fs_state(r->pipe, r->i_fs);
514 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
515 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
516 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
517 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
518 }
519
520 static bool
521 init_buffers(struct vl_mpeg12_mc_renderer *r)
522 {
523 struct pipe_resource template;
524 struct pipe_vertex_element vertex_elems[8];
525 struct pipe_sampler_view sampler_view;
526
527 const unsigned mbw =
528 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
529 const unsigned mbh =
530 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
531
532 unsigned i;
533
534 assert(r);
535
536 r->macroblocks_per_batch =
537 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
538 r->num_macroblocks = 0;
539 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
540
541 memset(&template, 0, sizeof(struct pipe_resource));
542 template.target = PIPE_TEXTURE_2D;
543 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
544 template.format = PIPE_FORMAT_R16_SNORM;
545 template.last_level = 0;
546 template.width0 = r->pot_buffers ?
547 util_next_power_of_two(r->picture_width) : r->picture_width;
548 template.height0 = r->pot_buffers ?
549 util_next_power_of_two(r->picture_height) : r->picture_height;
550 template.depth0 = 1;
551 template.usage = PIPE_USAGE_DYNAMIC;
552 template.bind = PIPE_BIND_SAMPLER_VIEW;
553 template.flags = 0;
554
555 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
556
557 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
558 template.width0 = r->pot_buffers ?
559 util_next_power_of_two(r->picture_width / 2) :
560 r->picture_width / 2;
561 template.height0 = r->pot_buffers ?
562 util_next_power_of_two(r->picture_height / 2) :
563 r->picture_height / 2;
564 }
565 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
566 template.height0 = r->pot_buffers ?
567 util_next_power_of_two(r->picture_height / 2) :
568 r->picture_height / 2;
569
570 r->textures.individual.cb =
571 r->pipe->screen->resource_create(r->pipe->screen, &template);
572 r->textures.individual.cr =
573 r->pipe->screen->resource_create(r->pipe->screen, &template);
574
575 for (i = 0; i < 3; ++i) {
576 u_sampler_view_default_template(&sampler_view,
577 r->textures.all[i],
578 r->textures.all[i]->format);
579 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
580 }
581
582 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
583 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
584 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
585 /* XXX: Create with usage DYNAMIC or STREAM */
586 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
587 (
588 r->pipe->screen,
589 PIPE_BIND_VERTEX_BUFFER,
590 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
591 );
592
593 for (i = 1; i < 3; ++i) {
594 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
595 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
596 r->vertex_bufs.all[i].buffer_offset = 0;
597 /* XXX: Create with usage DYNAMIC or STREAM */
598 r->vertex_bufs.all[i].buffer = pipe_buffer_create
599 (
600 r->pipe->screen,
601 PIPE_BIND_VERTEX_BUFFER,
602 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
603 );
604 }
605
606 /* Position element */
607 vertex_elems[0].src_offset = 0;
608 vertex_elems[0].instance_divisor = 0;
609 vertex_elems[0].vertex_buffer_index = 0;
610 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
611
612 /* Luma, texcoord element */
613 vertex_elems[1].src_offset = sizeof(struct vertex2f);
614 vertex_elems[1].instance_divisor = 0;
615 vertex_elems[1].vertex_buffer_index = 0;
616 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
617
618 /* Chroma Cr texcoord element */
619 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
620 vertex_elems[2].instance_divisor = 0;
621 vertex_elems[2].vertex_buffer_index = 0;
622 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
623
624 /* Chroma Cb texcoord element */
625 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
626 vertex_elems[3].instance_divisor = 0;
627 vertex_elems[3].vertex_buffer_index = 0;
628 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
629
630 /* First ref surface top field texcoord element */
631 vertex_elems[4].src_offset = 0;
632 vertex_elems[4].instance_divisor = 0;
633 vertex_elems[4].vertex_buffer_index = 1;
634 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
635
636 /* First ref surface bottom field texcoord element */
637 vertex_elems[5].src_offset = sizeof(struct vertex2f);
638 vertex_elems[5].instance_divisor = 0;
639 vertex_elems[5].vertex_buffer_index = 1;
640 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
641
642 /* Second ref surface top field texcoord element */
643 vertex_elems[6].src_offset = 0;
644 vertex_elems[6].instance_divisor = 0;
645 vertex_elems[6].vertex_buffer_index = 2;
646 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
647
648 /* Second ref surface bottom field texcoord element */
649 vertex_elems[7].src_offset = sizeof(struct vertex2f);
650 vertex_elems[7].instance_divisor = 0;
651 vertex_elems[7].vertex_buffer_index = 2;
652 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
653
654 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
655 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
656 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
657
658 r->vs_const_buf = pipe_buffer_create
659 (
660 r->pipe->screen,
661 PIPE_BIND_CONSTANT_BUFFER,
662 sizeof(struct vertex_shader_consts)
663 );
664
665 return true;
666 }
667
668 static void
669 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
670 {
671 unsigned i;
672
673 assert(r);
674
675 pipe_resource_reference(&r->vs_const_buf, NULL);
676
677 for (i = 0; i < 3; ++i) {
678 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
679 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
680 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
681 pipe_resource_reference(&r->textures.all[i], NULL);
682 }
683
684 FREE(r->macroblock_buf);
685 }
686
687 static enum MACROBLOCK_TYPE
688 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
689 {
690 assert(mb);
691
692 switch (mb->mb_type) {
693 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
694 return MACROBLOCK_TYPE_INTRA;
695 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
696 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
697 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
698 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
699 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
700 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
701 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
702 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
703 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
704 default:
705 assert(0);
706 }
707
708 /* Unreachable */
709 return -1;
710 }
711
712 static void
713 gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
714 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
715 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
716 bool use_zeroblocks, struct vertex2f *zero_blocks)
717 {
718 struct vertex2f v;
719
720 assert(vb);
721 assert(unit && half && offset);
722 assert(zero_blocks || !use_zeroblocks);
723
724 /* Generate vertices for two triangles covering a block */
725 v.x = mbx * unit->x + offset->x;
726 v.y = mby * unit->y + offset->y;
727
728 vb[0].pos.x = v.x;
729 vb[0].pos.y = v.y;
730 vb[1].pos.x = v.x;
731 vb[1].pos.y = v.y + half->y;
732 vb[2].pos.x = v.x + half->x;
733 vb[2].pos.y = v.y;
734 vb[3].pos.x = v.x + half->x;
735 vb[3].pos.y = v.y;
736 vb[4].pos.x = v.x;
737 vb[4].pos.y = v.y + half->y;
738 vb[5].pos.x = v.x + half->x;
739 vb[5].pos.y = v.y + half->y;
740
741 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
742 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
743 for this channel is defined for this block) */
744
745 if (!use_zeroblocks || cbp & luma_mask) {
746 v.x = mbx * unit->x + offset->x;
747 v.y = mby * unit->y + offset->y;
748 }
749 else {
750 v.x = zero_blocks[0].x;
751 v.y = zero_blocks[0].y;
752 }
753
754 vb[0].luma_tc.x = v.x;
755 vb[0].luma_tc.y = v.y;
756 vb[1].luma_tc.x = v.x;
757 vb[1].luma_tc.y = v.y + half->y;
758 vb[2].luma_tc.x = v.x + half->x;
759 vb[2].luma_tc.y = v.y;
760 vb[3].luma_tc.x = v.x + half->x;
761 vb[3].luma_tc.y = v.y;
762 vb[4].luma_tc.x = v.x;
763 vb[4].luma_tc.y = v.y + half->y;
764 vb[5].luma_tc.x = v.x + half->x;
765 vb[5].luma_tc.y = v.y + half->y;
766
767 if (!use_zeroblocks || cbp & cb_mask) {
768 v.x = mbx * unit->x + offset->x;
769 v.y = mby * unit->y + offset->y;
770 }
771 else {
772 v.x = zero_blocks[1].x;
773 v.y = zero_blocks[1].y;
774 }
775
776 vb[0].cb_tc.x = v.x;
777 vb[0].cb_tc.y = v.y;
778 vb[1].cb_tc.x = v.x;
779 vb[1].cb_tc.y = v.y + half->y;
780 vb[2].cb_tc.x = v.x + half->x;
781 vb[2].cb_tc.y = v.y;
782 vb[3].cb_tc.x = v.x + half->x;
783 vb[3].cb_tc.y = v.y;
784 vb[4].cb_tc.x = v.x;
785 vb[4].cb_tc.y = v.y + half->y;
786 vb[5].cb_tc.x = v.x + half->x;
787 vb[5].cb_tc.y = v.y + half->y;
788
789 if (!use_zeroblocks || cbp & cr_mask) {
790 v.x = mbx * unit->x + offset->x;
791 v.y = mby * unit->y + offset->y;
792 }
793 else {
794 v.x = zero_blocks[2].x;
795 v.y = zero_blocks[2].y;
796 }
797
798 vb[0].cr_tc.x = v.x;
799 vb[0].cr_tc.y = v.y;
800 vb[1].cr_tc.x = v.x;
801 vb[1].cr_tc.y = v.y + half->y;
802 vb[2].cr_tc.x = v.x + half->x;
803 vb[2].cr_tc.y = v.y;
804 vb[3].cr_tc.x = v.x + half->x;
805 vb[3].cr_tc.y = v.y;
806 vb[4].cr_tc.x = v.x;
807 vb[4].cr_tc.y = v.y + half->y;
808 vb[5].cr_tc.x = v.x + half->x;
809 vb[5].cr_tc.y = v.y + half->y;
810 }
811
812 static void
813 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
814 struct pipe_mpeg12_macroblock *mb, unsigned pos,
815 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
816 {
817 struct vertex2f mo_vec[2];
818
819 unsigned i;
820
821 assert(r);
822 assert(mb);
823 assert(ycbcr_vb);
824 assert(pos < r->macroblocks_per_batch);
825
826 mo_vec[1].x = 0;
827 mo_vec[1].y = 0;
828
829 switch (mb->mb_type) {
830 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
831 {
832 struct vertex2f *vb;
833
834 assert(ref_vb && ref_vb[1]);
835
836 vb = ref_vb[1] + pos * 2 * 24;
837
838 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
839 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
840
841 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
842 for (i = 0; i < 24 * 2; i += 2) {
843 vb[i].x = mo_vec[0].x;
844 vb[i].y = mo_vec[0].y;
845 }
846 }
847 else {
848 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
849 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
850
851 for (i = 0; i < 24 * 2; i += 2) {
852 vb[i].x = mo_vec[0].x;
853 vb[i].y = mo_vec[0].y;
854 vb[i + 1].x = mo_vec[1].x;
855 vb[i + 1].y = mo_vec[1].y;
856 }
857 }
858
859 /* fall-through */
860 }
861 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
862 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
863 {
864 struct vertex2f *vb;
865
866 assert(ref_vb && ref_vb[0]);
867
868 vb = ref_vb[0] + pos * 2 * 24;
869
870 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
871 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
872 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
873
874 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
875 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
876 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
877 }
878 }
879 else {
880 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
881 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
882
883 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
884 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
885 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
886 }
887 }
888
889 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
890 for (i = 0; i < 24 * 2; i += 2) {
891 vb[i].x = mo_vec[0].x;
892 vb[i].y = mo_vec[0].y;
893 }
894 }
895 else {
896 for (i = 0; i < 24 * 2; i += 2) {
897 vb[i].x = mo_vec[0].x;
898 vb[i].y = mo_vec[0].y;
899 vb[i + 1].x = mo_vec[1].x;
900 vb[i + 1].y = mo_vec[1].y;
901 }
902 }
903
904 /* fall-through */
905 }
906 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
907 {
908 const struct vertex2f unit =
909 {
910 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
911 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
912 };
913 const struct vertex2f half =
914 {
915 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
916 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
917 };
918 const struct vertex2f offsets[2][2] =
919 {
920 {
921 {0, 0}, {0, half.y}
922 },
923 {
924 {half.x, 0}, {half.x, half.y}
925 }
926 };
927 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
928
929 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
930
931 gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
932 &unit, &half, &offsets[0][0],
933 32, 2, 1, use_zb, r->zero_block);
934
935 gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
936 &unit, &half, &offsets[1][0],
937 16, 2, 1, use_zb, r->zero_block);
938
939 gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
940 &unit, &half, &offsets[0][1],
941 8, 2, 1, use_zb, r->zero_block);
942
943 gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
944 &unit, &half, &offsets[1][1],
945 4, 2, 1, use_zb, r->zero_block);
946
947 break;
948 }
949 default:
950 assert(0);
951 }
952 }
953
954 static void
955 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
956 unsigned *num_macroblocks)
957 {
958 unsigned offset[NUM_MACROBLOCK_TYPES];
959 struct vert_stream_0 *ycbcr_vb;
960 struct vertex2f *ref_vb[2];
961 struct pipe_transfer *buf_transfer[3];
962 unsigned i;
963
964 assert(r);
965 assert(num_macroblocks);
966
967 for (i = 0; i < r->num_macroblocks; ++i) {
968 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
969 ++num_macroblocks[mb_type];
970 }
971
972 offset[0] = 0;
973
974 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
975 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
976
977 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
978 (
979 r->pipe,
980 r->vertex_bufs.individual.ycbcr.buffer,
981 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
982 &buf_transfer[0]
983 );
984
985 for (i = 0; i < 2; ++i)
986 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
987 (
988 r->pipe,
989 r->vertex_bufs.individual.ref[i].buffer,
990 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
991 &buf_transfer[i + 1]
992 );
993
994 for (i = 0; i < r->num_macroblocks; ++i) {
995 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
996
997 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
998 ycbcr_vb, ref_vb);
999
1000 ++offset[mb_type];
1001 }
1002
1003 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1004 for (i = 0; i < 2; ++i)
1005 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1006 }
1007
1008 static struct pipe_sampler_view
1009 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1010 {
1011 struct pipe_sampler_view *sampler_view;
1012 assert(r);
1013 assert(surface);
1014
1015 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1016 if (!sampler_view) {
1017 struct pipe_sampler_view templat;
1018 boolean added_to_map;
1019
1020 u_sampler_view_default_template(&templat, surface->texture,
1021 surface->texture->format);
1022 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1023 &templat);
1024 if (!sampler_view)
1025 return NULL;
1026
1027 added_to_map = util_keymap_insert(r->texview_map, &surface,
1028 sampler_view, r->pipe);
1029 assert(added_to_map);
1030 }
1031
1032 return sampler_view;
1033 }
1034
1035 static void
1036 flush(struct vl_mpeg12_mc_renderer *r)
1037 {
1038 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1039 unsigned vb_start = 0;
1040 struct vertex_shader_consts *vs_consts;
1041 struct pipe_transfer *buf_transfer;
1042 unsigned i;
1043
1044 assert(r);
1045 assert(r->num_macroblocks == r->macroblocks_per_batch);
1046
1047 gen_macroblock_stream(r, num_macroblocks);
1048
1049 r->fb_state.cbufs[0] = r->surface;
1050
1051 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1052 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1053
1054 vs_consts = pipe_buffer_map
1055 (
1056 r->pipe, r->vs_const_buf,
1057 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1058 &buf_transfer
1059 );
1060
1061 vs_consts->denorm.x = r->surface->width;
1062 vs_consts->denorm.y = r->surface->height;
1063
1064 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1065
1066 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1067 r->vs_const_buf);
1068
1069 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1070 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1071 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1072 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1073 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1074 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1075 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1076
1077 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1078 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1079 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1080 }
1081
1082 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1083 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1084 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1085 r->textures.individual.ref[0] = r->past->texture;
1086 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1087 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1088 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1089 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1090 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1091
1092 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1093 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1094 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1095 }
1096
1097 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1098 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1099 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1100 r->textures.individual.ref[0] = r->past->texture;
1101 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1102 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1103 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1104 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1105 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1106
1107 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1108 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1109 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1110 }
1111
1112 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1113 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1114 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1115 r->textures.individual.ref[0] = r->future->texture;
1116 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1117 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1118 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1119 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1120 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1121
1122 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1123 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1124 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1125 }
1126
1127 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
1128 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1129 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1130 r->textures.individual.ref[0] = r->future->texture;
1131 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1132 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1133 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1134 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1135 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1136
1137 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1138 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1139 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1140 }
1141
1142 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1143 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1144 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1145 r->textures.individual.ref[0] = r->past->texture;
1146 r->textures.individual.ref[1] = r->future->texture;
1147 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1148 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1149 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1150 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1151 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1152 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1153
1154 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1155 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1156 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1157 }
1158
1159 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1160 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1161 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1162 r->textures.individual.ref[0] = r->past->texture;
1163 r->textures.individual.ref[1] = r->future->texture;
1164 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1165 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1166 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1167 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1168 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1169 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1170
1171 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1172 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1173 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1174 }
1175
1176 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1177
1178 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1179 for (i = 0; i < 3; ++i)
1180 r->zero_block[i].x = ZERO_BLOCK_NIL;
1181
1182 r->num_macroblocks = 0;
1183 }
1184
1185 static void
1186 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1187 {
1188 unsigned y;
1189
1190 assert(src);
1191 assert(dst);
1192
1193 for (y = 0; y < BLOCK_HEIGHT; ++y)
1194 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1195 }
1196
1197 static void
1198 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1199 {
1200 unsigned y;
1201
1202 assert(src);
1203 assert(dst);
1204
1205 for (y = 0; y < BLOCK_HEIGHT; ++y)
1206 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1207 }
1208
1209 static void
1210 fill_zero_block(short *dst, unsigned dst_pitch)
1211 {
1212 unsigned y;
1213
1214 assert(dst);
1215
1216 for (y = 0; y < BLOCK_HEIGHT; ++y)
1217 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1218 }
1219
1220 static void
1221 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1222 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1223 {
1224 unsigned tex_pitch;
1225 short *texels;
1226 unsigned tb = 0, sb = 0;
1227 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1228 unsigned x, y;
1229
1230 assert(r);
1231 assert(blocks);
1232
1233 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1234 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1235
1236 for (y = 0; y < 2; ++y) {
1237 for (x = 0; x < 2; ++x, ++tb) {
1238 if ((cbp >> (5 - tb)) & 1) {
1239 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1240 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1241 texels + y * tex_pitch * BLOCK_WIDTH +
1242 x * BLOCK_WIDTH, tex_pitch);
1243 }
1244 else {
1245 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1246 texels + y * tex_pitch + x * BLOCK_WIDTH,
1247 tex_pitch);
1248 }
1249
1250 ++sb;
1251 }
1252 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1253 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1254 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1255 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1256 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1257 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1258 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1259 }
1260 }
1261 }
1262 }
1263 }
1264
1265 /* TODO: Implement 422, 444 */
1266 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1267
1268 mbpx /= 2;
1269 mbpy /= 2;
1270
1271 for (tb = 0; tb < 2; ++tb) {
1272 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1273 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1274
1275 if ((cbp >> (1 - tb)) & 1) {
1276 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1277 ++sb;
1278 }
1279 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1280 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1281 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1282 fill_zero_block(texels, tex_pitch);
1283 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1284 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1285 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1286 }
1287 }
1288 }
1289 }
1290 }
1291
1292 static void
1293 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1294 struct pipe_mpeg12_macroblock *mb)
1295 {
1296 assert(r);
1297 assert(mb);
1298 assert(mb->blocks);
1299 assert(r->num_macroblocks < r->macroblocks_per_batch);
1300
1301 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1302 sizeof(struct pipe_mpeg12_macroblock));
1303
1304 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1305
1306 ++r->num_macroblocks;
1307 }
1308
1309 static void
1310 texview_map_delete(const struct keymap *map,
1311 const void *key, void *data,
1312 void *user)
1313 {
1314 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1315
1316 assert(map);
1317 assert(key);
1318 assert(data);
1319 assert(user);
1320
1321 pipe_sampler_view_reference(&sv, NULL);
1322 }
1323
1324 bool
1325 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1326 struct pipe_context *pipe,
1327 unsigned picture_width,
1328 unsigned picture_height,
1329 enum pipe_video_chroma_format chroma_format,
1330 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1331 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1332 bool pot_buffers)
1333 {
1334 unsigned i;
1335
1336 assert(renderer);
1337 assert(pipe);
1338 /* TODO: Implement other policies */
1339 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1340 /* TODO: Implement this */
1341 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1342 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1343 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1344 assert(pot_buffers);
1345
1346 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1347
1348 renderer->pipe = pipe;
1349 renderer->picture_width = picture_width;
1350 renderer->picture_height = picture_height;
1351 renderer->chroma_format = chroma_format;
1352 renderer->bufmode = bufmode;
1353 renderer->eb_handling = eb_handling;
1354 renderer->pot_buffers = pot_buffers;
1355
1356 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1357 texview_map_delete);
1358 if (!renderer->texview_map)
1359 return false;
1360
1361 if (!init_pipe_state(renderer)) {
1362 util_delete_keymap(renderer->texview_map, renderer->pipe);
1363 return false;
1364 }
1365 if (!init_shaders(renderer)) {
1366 util_delete_keymap(renderer->texview_map, renderer->pipe);
1367 cleanup_pipe_state(renderer);
1368 return false;
1369 }
1370 if (!init_buffers(renderer)) {
1371 util_delete_keymap(renderer->texview_map, renderer->pipe);
1372 cleanup_shaders(renderer);
1373 cleanup_pipe_state(renderer);
1374 return false;
1375 }
1376
1377 renderer->surface = NULL;
1378 renderer->past = NULL;
1379 renderer->future = NULL;
1380 for (i = 0; i < 3; ++i)
1381 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1382 renderer->num_macroblocks = 0;
1383
1384 xfer_buffers_map(renderer);
1385
1386 return true;
1387 }
1388
1389 void
1390 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1391 {
1392 assert(renderer);
1393
1394 xfer_buffers_unmap(renderer);
1395
1396 util_delete_keymap(renderer->texview_map, renderer->pipe);
1397 cleanup_pipe_state(renderer);
1398 cleanup_shaders(renderer);
1399 cleanup_buffers(renderer);
1400
1401 pipe_surface_reference(&renderer->surface, NULL);
1402 pipe_surface_reference(&renderer->past, NULL);
1403 pipe_surface_reference(&renderer->future, NULL);
1404 }
1405
1406 void
1407 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1408 *renderer,
1409 struct pipe_surface *surface,
1410 struct pipe_surface *past,
1411 struct pipe_surface *future,
1412 unsigned num_macroblocks,
1413 struct pipe_mpeg12_macroblock
1414 *mpeg12_macroblocks,
1415 struct pipe_fence_handle **fence)
1416 {
1417 bool new_surface = false;
1418
1419 assert(renderer);
1420 assert(surface);
1421 assert(num_macroblocks);
1422 assert(mpeg12_macroblocks);
1423
1424 if (renderer->surface) {
1425 if (surface != renderer->surface) {
1426 if (renderer->num_macroblocks > 0) {
1427 xfer_buffers_unmap(renderer);
1428 flush(renderer);
1429 }
1430
1431 new_surface = true;
1432 }
1433
1434 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1435 assert(surface != renderer->surface || renderer->past == past);
1436 assert(surface != renderer->surface || renderer->future == future);
1437 }
1438 else
1439 new_surface = true;
1440
1441 if (new_surface) {
1442 pipe_surface_reference(&renderer->surface, surface);
1443 pipe_surface_reference(&renderer->past, past);
1444 pipe_surface_reference(&renderer->future, future);
1445 renderer->fence = fence;
1446 renderer->surface_tex_inv_size.x = 1.0f / surface->width;
1447 renderer->surface_tex_inv_size.y = 1.0f / surface->height;
1448 }
1449
1450 while (num_macroblocks) {
1451 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1452 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1453 unsigned i;
1454
1455 for (i = 0; i < num_to_submit; ++i) {
1456 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1457 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1458 }
1459
1460 num_macroblocks -= num_to_submit;
1461
1462 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1463 xfer_buffers_unmap(renderer);
1464 flush(renderer);
1465 xfer_buffers_map(renderer);
1466 /* Next time we get this surface it may have new ref frames */
1467 pipe_surface_reference(&renderer->surface, NULL);
1468 pipe_surface_reference(&renderer->past, NULL);
1469 pipe_surface_reference(&renderer->future, NULL);
1470 }
1471 }
1472 }