4616e483f74a0dc40147454eac647fad36a110d7
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
39
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
46
47 struct vertex_shader_consts
48 {
49 struct vertex4f norm;
50 };
51
52 struct fragment_shader_consts
53 {
54 struct vertex4f multiplier;
55 struct vertex4f div;
56 };
57
58 struct vert_stream_0
59 {
60 struct vertex2f pos;
61 float luma_eb;
62 float cb_eb;
63 float cr_eb;
64 };
65
66 enum MACROBLOCK_TYPE
67 {
68 MACROBLOCK_TYPE_INTRA,
69 MACROBLOCK_TYPE_FWD_FRAME_PRED,
70 MACROBLOCK_TYPE_FWD_FIELD_PRED,
71 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
72 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
73 MACROBLOCK_TYPE_BI_FRAME_PRED,
74 MACROBLOCK_TYPE_BI_FIELD_PRED,
75
76 NUM_MACROBLOCK_TYPES
77 };
78
79 /* vertices for two triangles covering four block */
80 static const struct vertex2f const_rectangle[4][2][3] = {
81 {
82 { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f} },
83 { {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f} }
84 }, {
85 { {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f} },
86 { {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f} }
87 }, {
88 { {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f} },
89 { {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f} }
90 }, {
91 { {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f} },
92 { {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} }
93 }
94 };
95
96 static void *
97 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
98 {
99 struct ureg_program *shader;
100 struct ureg_src norm, mbs;
101 struct ureg_src vrect, vpos, vtex, vmv[4];
102 struct ureg_dst t_vpos, scale;
103 struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
104 unsigned i, j, count;
105 bool interlaced = mv_per_frame == 2;
106
107 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
108 if (!shader)
109 return NULL;
110
111 norm = ureg_DECL_constant(shader, 0);
112 mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
113
114 t_vpos = ureg_DECL_temporary(shader);
115 scale = ureg_DECL_temporary(shader);
116
117 vrect = ureg_DECL_vs_input(shader, 0);
118 vpos = ureg_DECL_vs_input(shader, 1);
119 vtex = ureg_DECL_vs_input(shader, 2);
120
121 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
122 for (i = 0; i < 3; ++i)
123 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
124
125 count=0;
126 for (i = 0; i < ref_frames; ++i) {
127 for (j = 0; j < 2; ++j) {
128 if(j < mv_per_frame) {
129 vmv[count] = ureg_DECL_vs_input(shader, 3 + i * 2 + j);
130 o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
131 count++;
132 }
133 /* workaround for r600g */
134 else if(ref_frames == 2)
135 ureg_DECL_vs_input(shader, 3 + i * 2 + j);
136 }
137 }
138
139 o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
140
141 /*
142 * scale = norm * mbs;
143 *
144 * t_vpos = (vpos + vrect) * scale
145 * o_vpos.xy = t_vpos
146 * o_vpos.zw = vpos
147 *
148 * o_vtex[0..2].xy = t_vpos
149 * o_vtex[0..2].z = vtex[0..2].[xyz]
150 *
151 * if(count > 0) { // Apply motion vectors
152 * scale = norm * 0.5;
153 * o_vmv[0..count] = t_vpos + vmv[0..4] * scale
154 * }
155 *
156 * if(interlaced) {
157 * o_line.x = 1
158 * o_line.y = vpos.y * 8
159 * }
160 */
161 ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
162
163 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
164 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
165 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
166 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
167
168 for (i = 0; i < 3; ++i) {
169 ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
170 ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_X + i));
171 }
172
173 if(count > 0) {
174 ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
175 for (i = 0; i < count; ++i)
176 ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
177 }
178
179 if (interlaced) {
180 ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
181 ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
182 }
183
184 ureg_release_temporary(shader, t_vpos);
185 ureg_release_temporary(shader, scale);
186
187 ureg_END(shader);
188
189 return ureg_create_shader_and_destroy(shader, r->pipe);
190 }
191
192 static struct ureg_dst
193 fetch_ycbcr(struct ureg_program *shader)
194 {
195 struct ureg_src tc[3];
196 struct ureg_src sampler[3];
197 struct ureg_dst texel, tmp;
198 unsigned i;
199
200 texel = ureg_DECL_temporary(shader);
201 tmp = ureg_DECL_temporary(shader);
202
203 for (i = 0; i < 3; ++i) {
204 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
205 sampler[i] = ureg_DECL_sampler(shader, i);
206 }
207
208 /*
209 * texel.y = tex(tc[0], sampler[0])
210 * texel.cb = tex(tc[1], sampler[1])
211 * texel.cr = tex(tc[2], sampler[2])
212 */
213 for (i = 0; i < 3; ++i) {
214 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
215 ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, tc[i], sampler[i]);
216 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
217 }
218
219 ureg_release_temporary(shader, tmp);
220
221 return texel;
222 }
223
224 static void *
225 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
226 {
227 struct ureg_program *shader;
228 struct ureg_dst texel;
229 struct ureg_dst fragment;
230
231 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
232 if (!shader)
233 return NULL;
234
235 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
236
237 /*
238 * texel = fetch_ycbcr()
239 * fragment = texel * scale
240 */
241 texel = fetch_ycbcr(shader);
242 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
243
244 ureg_release_temporary(shader, texel);
245 ureg_END(shader);
246
247 return ureg_create_shader_and_destroy(shader, r->pipe);
248 }
249
250 static void *
251 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
252 {
253 struct ureg_program *shader;
254 struct ureg_src tc;
255 struct ureg_src sampler;
256 struct ureg_dst texel, ref;
257 struct ureg_dst fragment;
258
259 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
260 if (!shader)
261 return NULL;
262
263 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 4, TGSI_INTERPOLATE_LINEAR);
264 sampler = ureg_DECL_sampler(shader, 3);
265
266 ref = ureg_DECL_temporary(shader);
267 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
268
269 /*
270 * texel = fetch_ycbcr()
271 * ref = tex(tc, sampler)
272 * fragment = texel * scale + ref
273 */
274 texel = fetch_ycbcr(shader);
275 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
276 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
277
278 ureg_release_temporary(shader, texel);
279 ureg_release_temporary(shader, ref);
280 ureg_END(shader);
281
282 return ureg_create_shader_and_destroy(shader, r->pipe);
283 }
284
285 static void *
286 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
287 {
288 struct ureg_program *shader;
289 struct ureg_src tc[2], line;
290 struct ureg_src sampler;
291 struct ureg_dst texel, ref, tmp;
292 struct ureg_dst fragment;
293 unsigned i, label;
294
295 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
296 if (!shader)
297 return NULL;
298
299 for (i = 0; i < 2; ++i)
300 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
301 sampler = ureg_DECL_sampler(shader, 3);
302
303 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
304
305 ref = ureg_DECL_temporary(shader);
306 tmp = ureg_DECL_temporary(shader);
307 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
308
309 /*
310 * texel = fetch_ycbcr()
311 * if(line % 2)
312 * ref = tex(tc[1], sampler)
313 * else
314 * ref = tex(tc[0], sampler)
315 * fragment = texel * scale + ref
316 */
317 texel = fetch_ycbcr(shader);
318
319 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
320 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
321 ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
322 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
323 ureg_ELSE(shader, &label);
324 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
325 ureg_ENDIF(shader);
326
327 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
328
329 ureg_release_temporary(shader, tmp);
330 ureg_release_temporary(shader, texel);
331 ureg_release_temporary(shader, ref);
332 ureg_END(shader);
333
334 return ureg_create_shader_and_destroy(shader, r->pipe);
335 }
336
337 static void *
338 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
339 {
340 struct ureg_program *shader;
341 struct ureg_src tc[2];
342 struct ureg_src sampler[2];
343 struct ureg_dst texel, ref[2];
344 struct ureg_dst fragment;
345 unsigned i;
346
347 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
348 if (!shader)
349 return NULL;
350
351 for (i = 0; i < 2; ++i) {
352 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
353 sampler[i] = ureg_DECL_sampler(shader, i + 3);
354 }
355
356 ref[0] = ureg_DECL_temporary(shader);
357 ref[1] = ureg_DECL_temporary(shader);
358 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
359
360 /*
361 * texel = fetch_ycbcr()
362 * ref[0..1 = tex(tc[3..4], sampler[3..4])
363 * ref[0] = lerp(ref[0], ref[1], 0.5)
364 * fragment = texel * scale + ref[0]
365 */
366 texel = fetch_ycbcr(shader);
367 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
368 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
369 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
370
371 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
372
373 ureg_release_temporary(shader, texel);
374 ureg_release_temporary(shader, ref[0]);
375 ureg_release_temporary(shader, ref[1]);
376 ureg_END(shader);
377
378 return ureg_create_shader_and_destroy(shader, r->pipe);
379 }
380
381 static void *
382 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
383 {
384 struct ureg_program *shader;
385 struct ureg_src tc[4], line;
386 struct ureg_src sampler[2];
387 struct ureg_dst texel, ref[2], tmp;
388 struct ureg_dst fragment;
389 unsigned i, label;
390
391 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
392 if (!shader)
393 return NULL;
394
395 for (i = 0; i < 4; ++i)
396 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
397 for (i = 0; i < 2; ++i)
398 sampler[i] = ureg_DECL_sampler(shader, i + 3);
399
400 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
401
402 texel = ureg_DECL_temporary(shader);
403 ref[0] = ureg_DECL_temporary(shader);
404 ref[1] = ureg_DECL_temporary(shader);
405 tmp = ureg_DECL_temporary(shader);
406 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
407
408 /*
409 * texel = fetch_ycbcr()
410 * if(line % 2)
411 * ref[0..1] = tex(tc[4|6], sampler[3..4])
412 * else
413 * ref[0..1] = tex(tc[3|5], sampler[3..4])
414 * ref[0] = lerp(ref[0], ref[1], 0.5)
415 * fragment = texel * scale + ref[0]
416 */
417 texel = fetch_ycbcr(shader);
418
419 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
420 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
421 ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
422 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
423 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
424 ureg_ELSE(shader, &label);
425 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
426 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
427 ureg_ENDIF(shader);
428
429 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
430
431 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
432
433 ureg_release_temporary(shader, tmp);
434 ureg_release_temporary(shader, texel);
435 ureg_release_temporary(shader, ref[0]);
436 ureg_release_temporary(shader, ref[1]);
437 ureg_END(shader);
438
439 return ureg_create_shader_and_destroy(shader, r->pipe);
440 }
441
442 static void
443 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
444 {
445 unsigned i;
446
447 assert(r);
448
449 for (i = 0; i < 3; ++i) {
450 struct pipe_box rect =
451 {
452 0, 0, 0,
453 r->textures.all[i]->width0,
454 r->textures.all[i]->height0,
455 1
456 };
457
458 r->tex_transfer[i] = r->pipe->get_transfer
459 (
460 r->pipe, r->textures.all[i],
461 u_subresource(0, 0),
462 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
463 &rect
464 );
465
466 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
467 }
468 }
469
470 static void
471 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
472 {
473 unsigned i;
474
475 assert(r);
476
477 for (i = 0; i < 3; ++i) {
478 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
479 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
480 }
481 }
482
483 static bool
484 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
485 {
486 struct pipe_sampler_state sampler;
487 unsigned filters[5];
488 unsigned i;
489
490 assert(r);
491
492 r->viewport.scale[0] = r->pot_buffers ?
493 util_next_power_of_two(r->picture_width) : r->picture_width;
494 r->viewport.scale[1] = r->pot_buffers ?
495 util_next_power_of_two(r->picture_height) : r->picture_height;
496 r->viewport.scale[2] = 1;
497 r->viewport.scale[3] = 1;
498 r->viewport.translate[0] = 0;
499 r->viewport.translate[1] = 0;
500 r->viewport.translate[2] = 0;
501 r->viewport.translate[3] = 0;
502
503 r->fb_state.width = r->pot_buffers ?
504 util_next_power_of_two(r->picture_width) : r->picture_width;
505 r->fb_state.height = r->pot_buffers ?
506 util_next_power_of_two(r->picture_height) : r->picture_height;
507 r->fb_state.nr_cbufs = 1;
508 r->fb_state.zsbuf = NULL;
509
510 /* Luma filter */
511 filters[0] = PIPE_TEX_FILTER_NEAREST;
512 /* Chroma filters */
513 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
514 filters[1] = PIPE_TEX_FILTER_NEAREST;
515 filters[2] = PIPE_TEX_FILTER_NEAREST;
516 }
517 else {
518 filters[1] = PIPE_TEX_FILTER_LINEAR;
519 filters[2] = PIPE_TEX_FILTER_LINEAR;
520 }
521 /* Fwd, bkwd ref filters */
522 filters[3] = PIPE_TEX_FILTER_LINEAR;
523 filters[4] = PIPE_TEX_FILTER_LINEAR;
524
525 for (i = 0; i < 5; ++i) {
526 memset(&sampler, 0, sizeof(sampler));
527 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
528 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
529 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
530 sampler.min_img_filter = filters[i];
531 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
532 sampler.mag_img_filter = filters[i];
533 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
534 sampler.compare_func = PIPE_FUNC_ALWAYS;
535 sampler.normalized_coords = 1;
536 /*sampler.shadow_ambient = ; */
537 /*sampler.lod_bias = ; */
538 sampler.min_lod = 0;
539 /*sampler.max_lod = ; */
540 sampler.border_color[0] = 0.0f;
541 sampler.border_color[1] = 0.0f;
542 sampler.border_color[2] = 0.0f;
543 sampler.border_color[3] = 0.0f;
544 /*sampler.max_anisotropy = ; */
545 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
546 }
547
548 return true;
549 }
550
551 static void
552 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
553 {
554 unsigned i;
555
556 assert(r);
557
558 for (i = 0; i < 5; ++i)
559 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
560 }
561
562 static bool
563 init_shaders(struct vl_mpeg12_mc_renderer *r)
564 {
565 assert(r);
566
567 assert(r->i_vs = create_vert_shader(r, 0, 0));
568 assert(r->i_fs = create_intra_frag_shader(r));
569
570 assert(r->p_vs[0] = create_vert_shader(r, 1, 1));
571 assert(r->p_vs[1] = create_vert_shader(r, 1, 2));
572 assert(r->p_fs[0] = create_frame_pred_frag_shader(r));
573 assert(r->p_fs[1] = create_field_pred_frag_shader(r));
574
575 assert(r->b_vs[0] = create_vert_shader(r, 2, 1));
576 assert(r->b_vs[1] = create_vert_shader(r, 2, 2));
577 assert(r->b_fs[0] = create_frame_bi_pred_frag_shader(r));
578 assert(r->b_fs[1] = create_field_bi_pred_frag_shader(r));
579
580 return true;
581 }
582
583 static void
584 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
585 {
586 assert(r);
587
588 r->pipe->delete_vs_state(r->pipe, r->i_vs);
589 r->pipe->delete_fs_state(r->pipe, r->i_fs);
590 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
591 r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
592 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
593 r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
594 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
595 r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
596 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
597 r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
598 }
599
600 static bool
601 init_buffers(struct vl_mpeg12_mc_renderer *r)
602 {
603 struct pipe_resource template;
604 struct pipe_vertex_element vertex_elems[7];
605 struct pipe_sampler_view sampler_view;
606
607 const unsigned mbw =
608 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
609 const unsigned mbh =
610 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
611
612 unsigned i;
613
614 assert(r);
615
616 r->macroblocks_per_batch =
617 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
618 r->num_macroblocks = 0;
619 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
620
621 memset(&template, 0, sizeof(struct pipe_resource));
622 template.target = PIPE_TEXTURE_3D;
623 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
624 template.format = PIPE_FORMAT_R16_SNORM;
625 template.last_level = 0;
626 template.width0 = r->pot_buffers ?
627 util_next_power_of_two(r->picture_width) : r->picture_width;
628 template.height0 = r->pot_buffers ?
629 util_next_power_of_two(r->picture_height) : r->picture_height;
630 template.depth0 = 1;
631 template.usage = PIPE_USAGE_DYNAMIC;
632 template.bind = PIPE_BIND_SAMPLER_VIEW;
633 template.flags = 0;
634
635 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
636
637 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
638 template.width0 = r->pot_buffers ?
639 util_next_power_of_two(r->picture_width / 2) :
640 r->picture_width / 2;
641 template.height0 = r->pot_buffers ?
642 util_next_power_of_two(r->picture_height / 2) :
643 r->picture_height / 2;
644 }
645 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
646 template.height0 = r->pot_buffers ?
647 util_next_power_of_two(r->picture_height / 2) :
648 r->picture_height / 2;
649
650 r->textures.individual.cb =
651 r->pipe->screen->resource_create(r->pipe->screen, &template);
652 r->textures.individual.cr =
653 r->pipe->screen->resource_create(r->pipe->screen, &template);
654
655 for (i = 0; i < 3; ++i) {
656 u_sampler_view_default_template(&sampler_view,
657 r->textures.all[i],
658 r->textures.all[i]->format);
659 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
660 }
661
662 r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
663 r->vertex_bufs.individual.rect.max_index = 24 * r->macroblocks_per_batch - 1;
664 r->vertex_bufs.individual.rect.buffer_offset = 0;
665 r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
666 (
667 r->pipe->screen,
668 PIPE_BIND_VERTEX_BUFFER,
669 sizeof(struct vertex2f) * 24 * r->macroblocks_per_batch
670 );
671
672 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
673 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
674 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
675 /* XXX: Create with usage DYNAMIC or STREAM */
676 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
677 (
678 r->pipe->screen,
679 PIPE_BIND_VERTEX_BUFFER,
680 sizeof(struct vert_stream_0) * 24 * r->macroblocks_per_batch
681 );
682
683 for (i = 0; i < 2; ++i) {
684 r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
685 r->vertex_bufs.individual.ref[i].max_index = 24 * r->macroblocks_per_batch - 1;
686 r->vertex_bufs.individual.ref[i].buffer_offset = 0;
687 /* XXX: Create with usage DYNAMIC or STREAM */
688 r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
689 (
690 r->pipe->screen,
691 PIPE_BIND_VERTEX_BUFFER,
692 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
693 );
694 }
695
696 memset(&vertex_elems, 0, sizeof(vertex_elems));
697
698 /* Rectangle element */
699 vertex_elems[0].src_offset = 0;
700 vertex_elems[0].instance_divisor = 0;
701 vertex_elems[0].vertex_buffer_index = 0;
702 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
703
704 /* Position element */
705 vertex_elems[1].src_offset = 0;
706 vertex_elems[1].instance_divisor = 0;
707 vertex_elems[1].vertex_buffer_index = 1;
708 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
709
710 /* y, cr, cb z-coordinate element */
711 vertex_elems[2].src_offset = sizeof(struct vertex2f);
712 vertex_elems[2].instance_divisor = 0;
713 vertex_elems[2].vertex_buffer_index = 1;
714 vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
715
716 /* First ref surface top field texcoord element */
717 vertex_elems[3].src_offset = 0;
718 vertex_elems[3].instance_divisor = 0;
719 vertex_elems[3].vertex_buffer_index = 2;
720 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
721
722 /* First ref surface bottom field texcoord element */
723 vertex_elems[4].src_offset = sizeof(struct vertex2f);
724 vertex_elems[4].instance_divisor = 0;
725 vertex_elems[4].vertex_buffer_index = 2;
726 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
727
728 /* Second ref surface top field texcoord element */
729 vertex_elems[5].src_offset = 0;
730 vertex_elems[5].instance_divisor = 0;
731 vertex_elems[5].vertex_buffer_index = 3;
732 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
733
734 /* Second ref surface bottom field texcoord element */
735 vertex_elems[6].src_offset = sizeof(struct vertex2f);
736 vertex_elems[6].instance_divisor = 0;
737 vertex_elems[6].vertex_buffer_index = 3;
738 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
739
740 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
741 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
742 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
743
744 r->vs_const_buf = pipe_buffer_create
745 (
746 r->pipe->screen,
747 PIPE_BIND_CONSTANT_BUFFER,
748 sizeof(struct vertex_shader_consts)
749 );
750
751 return true;
752 }
753
754 static void
755 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
756 {
757 unsigned i;
758
759 assert(r);
760
761 pipe_resource_reference(&r->vs_const_buf, NULL);
762
763 for (i = 0; i < 3; ++i) {
764 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
765 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
766 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
767 pipe_resource_reference(&r->textures.all[i], NULL);
768 }
769
770 FREE(r->macroblock_buf);
771 }
772
773 static enum MACROBLOCK_TYPE
774 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
775 {
776 assert(mb);
777
778 switch (mb->mb_type) {
779 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
780 return MACROBLOCK_TYPE_INTRA;
781 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
782 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
783 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
784 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
785 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
786 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
787 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
788 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
789 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
790 default:
791 assert(0);
792 }
793
794 /* Unreachable */
795 return -1;
796 }
797
798 void
799 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
800 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask)
801 {
802 unsigned cbp = mb->cbp;
803 unsigned i;
804 struct vert_stream_0 v;
805
806 assert(vb);
807
808 v.pos.x = mb->mbx;
809 v.pos.y = mb->mby;
810
811 if (cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
812 v.luma_eb = 0.0f;
813 }
814 else {
815 v.luma_eb = -1.0f;
816 }
817
818 if (cbp & cb_mask) {
819 v.cb_eb = 0.0f;
820 }
821 else {
822 v.cb_eb = -1.0f;
823 }
824
825 if (cbp & cr_mask) {
826 v.cr_eb = 0.0f;
827 }
828 else {
829 v.cr_eb = -1.0f;
830 }
831
832 for ( i = 0; i < 6; ++i )
833 memcpy(vb + i, &v, sizeof(v));
834 }
835
836 void
837 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
838 struct pipe_mpeg12_macroblock *mb, unsigned pos,
839 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
840 {
841 struct vertex2f mo_vec[2];
842
843 unsigned i;
844
845 assert(r);
846 assert(mb);
847 assert(ycbcr_vb);
848 assert(pos < r->macroblocks_per_batch);
849
850 mo_vec[1].x = 0;
851 mo_vec[1].y = 0;
852
853 switch (mb->mb_type) {
854 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
855 {
856 struct vertex2f *vb;
857
858 assert(ref_vb && ref_vb[1]);
859
860 vb = ref_vb[1] + pos * 2 * 24;
861
862 mo_vec[0].x = mb->pmv[0][1][0];
863 mo_vec[0].y = mb->pmv[0][1][1];
864
865 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
866 for (i = 0; i < 24 * 2; i += 2) {
867 vb[i].x = mo_vec[0].x;
868 vb[i].y = mo_vec[0].y;
869 }
870 }
871 else {
872 mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
873
874 mo_vec[1].x = mb->pmv[1][1][0];
875 mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
876
877 if(mb->mvfs[0][1]) mo_vec[0].y += 2;
878 if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
879
880 for (i = 0; i < 24 * 2; i += 2) {
881 vb[i].x = mo_vec[0].x;
882 vb[i].y = mo_vec[0].y;
883 vb[i + 1].x = mo_vec[1].x;
884 vb[i + 1].y = mo_vec[1].y;
885 }
886 }
887
888 /* fall-through */
889 }
890 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
891 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
892 {
893 struct vertex2f *vb;
894
895 assert(ref_vb && ref_vb[0]);
896
897 vb = ref_vb[0] + pos * 2 * 24;
898
899 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
900 mo_vec[0].x = mb->pmv[0][1][0];
901 mo_vec[0].y = mb->pmv[0][1][1];
902
903 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
904 mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
905
906 mo_vec[1].x = mb->pmv[1][1][0];
907 mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
908
909 if(mb->mvfs[0][1]) mo_vec[0].y += 2;
910 if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
911 }
912 }
913 else {
914 mo_vec[0].x = mb->pmv[0][0][0];
915 mo_vec[0].y = mb->pmv[0][0][1];
916
917 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
918 mo_vec[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
919
920 mo_vec[1].x = mb->pmv[1][0][0];
921 mo_vec[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
922
923 if(mb->mvfs[0][0]) mo_vec[0].y += 2;
924 if(!mb->mvfs[1][0]) mo_vec[1].y -= 2;
925 }
926 }
927
928 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
929 for (i = 0; i < 24 * 2; i += 2) {
930 vb[i].x = mo_vec[0].x;
931 vb[i].y = mo_vec[0].y;
932 }
933 }
934 else {
935 for (i = 0; i < 24 * 2; i += 2) {
936 vb[i].x = mo_vec[0].x;
937 vb[i].y = mo_vec[0].y;
938 vb[i + 1].x = mo_vec[1].x;
939 vb[i + 1].y = mo_vec[1].y;
940 }
941 }
942
943 /* fall-through */
944 }
945 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
946 {
947 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
948
949 gen_block_verts(vb, mb, 32, 2, 1);
950 gen_block_verts(vb + 6, mb, 16, 2, 1);
951 gen_block_verts(vb + 12, mb, 8, 2, 1);
952 gen_block_verts(vb + 18, mb, 4, 2, 1);
953
954 break;
955 }
956 default:
957 assert(0);
958 }
959 }
960
961 static void
962 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
963 unsigned *num_macroblocks)
964 {
965 unsigned offset[NUM_MACROBLOCK_TYPES];
966 struct vert_stream_0 *ycbcr_vb;
967 struct vertex2f *ref_vb[2];
968 struct pipe_transfer *buf_transfer[3];
969 unsigned i;
970
971 assert(r);
972 assert(num_macroblocks);
973
974 for (i = 0; i < r->num_macroblocks; ++i) {
975 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
976 ++num_macroblocks[mb_type];
977 }
978
979 offset[0] = 0;
980
981 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
982 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
983
984 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
985 (
986 r->pipe,
987 r->vertex_bufs.individual.ycbcr.buffer,
988 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
989 &buf_transfer[0]
990 );
991
992 for (i = 0; i < 2; ++i)
993 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
994 (
995 r->pipe,
996 r->vertex_bufs.individual.ref[i].buffer,
997 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
998 &buf_transfer[i + 1]
999 );
1000
1001 for (i = 0; i < r->num_macroblocks; ++i) {
1002 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1003
1004 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1005 ycbcr_vb, ref_vb);
1006
1007 ++offset[mb_type];
1008 }
1009
1010 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1011 for (i = 0; i < 2; ++i)
1012 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1013 }
1014
1015 static struct pipe_sampler_view
1016 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1017 {
1018 struct pipe_sampler_view *sampler_view;
1019 assert(r);
1020 assert(surface);
1021
1022 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1023 if (!sampler_view) {
1024 struct pipe_sampler_view templat;
1025 boolean added_to_map;
1026
1027 u_sampler_view_default_template(&templat, surface->texture,
1028 surface->texture->format);
1029 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1030 &templat);
1031 if (!sampler_view)
1032 return NULL;
1033
1034 added_to_map = util_keymap_insert(r->texview_map, &surface,
1035 sampler_view, r->pipe);
1036 assert(added_to_map);
1037 }
1038
1039 return sampler_view;
1040 }
1041
1042 static void
1043 flush(struct vl_mpeg12_mc_renderer *r)
1044 {
1045 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1046 unsigned vb_start = 0;
1047 struct vertex_shader_consts *vs_consts;
1048 struct pipe_transfer *buf_transfer;
1049 struct vertex2f *rect;
1050 unsigned i;
1051
1052 assert(r);
1053 assert(r->num_macroblocks == r->macroblocks_per_batch);
1054
1055 rect = pipe_buffer_map
1056 (
1057 r->pipe,
1058 r->vertex_bufs.individual.rect.buffer,
1059 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1060 &buf_transfer
1061 );
1062
1063 for ( i = 0; i < r->macroblocks_per_batch; ++i)
1064 memcpy(rect + i * 24, &const_rectangle, sizeof(const_rectangle));
1065
1066 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
1067
1068 gen_macroblock_stream(r, num_macroblocks);
1069
1070 r->fb_state.cbufs[0] = r->surface;
1071
1072 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1073 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1074
1075 vs_consts = pipe_buffer_map
1076 (
1077 r->pipe, r->vs_const_buf,
1078 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1079 &buf_transfer
1080 );
1081
1082 vs_consts->norm.x = 1.0f / r->surface->width;
1083 vs_consts->norm.y = 1.0f / r->surface->height;
1084
1085 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1086
1087 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1088 r->vs_const_buf);
1089
1090 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1091 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1092 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1093 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1094 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1095 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1096 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1097
1098 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1099 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1100 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1101 }
1102
1103 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1104 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1105 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1106 r->textures.individual.ref[0] = r->past->texture;
1107 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1108 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1109 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1110 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1111 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1112
1113 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1114 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1115 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1116 }
1117
1118 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
1119 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1120 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1121 r->textures.individual.ref[0] = r->past->texture;
1122 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1123 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1124 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1125 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1126 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1127
1128 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1129 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1130 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1131 }
1132
1133 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1134 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1135 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1136 r->textures.individual.ref[0] = r->future->texture;
1137 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1138 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1139 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1140 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1141 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1142
1143 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1144 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1145 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1146 }
1147
1148 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
1149 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1150 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1151 r->textures.individual.ref[0] = r->future->texture;
1152 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1153 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1154 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1155 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1156 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1157
1158 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1159 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1160 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1161 }
1162
1163 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1164 r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
1165 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1166 r->textures.individual.ref[0] = r->past->texture;
1167 r->textures.individual.ref[1] = r->future->texture;
1168 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1169 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1170 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1171 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1172 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1173 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1174
1175 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1176 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1177 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1178 }
1179
1180 if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
1181 r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
1182 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1183 r->textures.individual.ref[0] = r->past->texture;
1184 r->textures.individual.ref[1] = r->future->texture;
1185 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1186 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1187 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1188 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1189 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1190 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1191
1192 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1193 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1194 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1195 }
1196
1197 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1198
1199 r->num_macroblocks = 0;
1200 }
1201
1202 static void
1203 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1204 {
1205 unsigned y;
1206
1207 assert(src);
1208 assert(dst);
1209
1210 for (y = 0; y < BLOCK_HEIGHT; ++y)
1211 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1212 }
1213
1214 static void
1215 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1216 {
1217 unsigned y;
1218
1219 assert(src);
1220 assert(dst);
1221
1222 for (y = 0; y < BLOCK_HEIGHT; ++y)
1223 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1224 }
1225
1226 static void
1227 fill_field_zero_block(short *dst, unsigned dst_pitch)
1228 {
1229 unsigned y;
1230
1231 assert(dst);
1232
1233 for (y = 0; y < BLOCK_HEIGHT; ++y)
1234 memset(dst + y * dst_pitch * 2, 0, BLOCK_WIDTH * 2);
1235 }
1236
1237 static void
1238 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1239 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1240 {
1241 unsigned tex_pitch;
1242 short *texels;
1243 unsigned tb = 0, sb = 0;
1244 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1245 unsigned x, y;
1246
1247 assert(r);
1248 assert(blocks);
1249
1250 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1251 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1252
1253 for (y = 0; y < 2; ++y) {
1254 for (x = 0; x < 2; ++x, ++tb) {
1255 if ((cbp >> (5 - tb)) & 1) {
1256 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1257 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1258 texels + y * tex_pitch * BLOCK_HEIGHT +
1259 x * BLOCK_WIDTH, tex_pitch);
1260 }
1261 else {
1262 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1263 texels + y * tex_pitch + x * BLOCK_WIDTH,
1264 tex_pitch);
1265 }
1266
1267 ++sb;
1268 }
1269 else if(dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
1270 fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
1271 }
1272 }
1273 }
1274
1275 /* TODO: Implement 422, 444 */
1276 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1277
1278 mbpx /= 2;
1279 mbpy /= 2;
1280
1281 for (tb = 0; tb < 2; ++tb) {
1282 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1283 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1284
1285 if ((cbp >> (1 - tb)) & 1) {
1286 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1287 ++sb;
1288 }
1289 }
1290 }
1291
1292 static void
1293 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1294 struct pipe_mpeg12_macroblock *mb)
1295 {
1296 assert(r);
1297 assert(mb);
1298 assert(mb->blocks);
1299 assert(r->num_macroblocks < r->macroblocks_per_batch);
1300
1301 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1302 sizeof(struct pipe_mpeg12_macroblock));
1303
1304 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1305
1306 ++r->num_macroblocks;
1307 }
1308
1309 static void
1310 texview_map_delete(const struct keymap *map,
1311 const void *key, void *data,
1312 void *user)
1313 {
1314 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1315
1316 assert(map);
1317 assert(key);
1318 assert(data);
1319 assert(user);
1320
1321 pipe_sampler_view_reference(&sv, NULL);
1322 }
1323
1324 bool
1325 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1326 struct pipe_context *pipe,
1327 unsigned picture_width,
1328 unsigned picture_height,
1329 enum pipe_video_chroma_format chroma_format,
1330 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1331 bool pot_buffers)
1332 {
1333 assert(renderer);
1334 assert(pipe);
1335 /* TODO: Implement other policies */
1336 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1337 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1338 assert(pot_buffers);
1339
1340 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1341
1342 renderer->pipe = pipe;
1343 renderer->picture_width = picture_width;
1344 renderer->picture_height = picture_height;
1345 renderer->chroma_format = chroma_format;
1346 renderer->bufmode = bufmode;
1347 renderer->pot_buffers = pot_buffers;
1348
1349 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1350 texview_map_delete);
1351 if (!renderer->texview_map)
1352 return false;
1353
1354 if (!init_pipe_state(renderer)) {
1355 util_delete_keymap(renderer->texview_map, renderer->pipe);
1356 return false;
1357 }
1358 if (!init_shaders(renderer)) {
1359 util_delete_keymap(renderer->texview_map, renderer->pipe);
1360 cleanup_pipe_state(renderer);
1361 return false;
1362 }
1363 if (!init_buffers(renderer)) {
1364 util_delete_keymap(renderer->texview_map, renderer->pipe);
1365 cleanup_shaders(renderer);
1366 cleanup_pipe_state(renderer);
1367 return false;
1368 }
1369
1370 renderer->surface = NULL;
1371 renderer->past = NULL;
1372 renderer->future = NULL;
1373 renderer->num_macroblocks = 0;
1374
1375 xfer_buffers_map(renderer);
1376
1377 return true;
1378 }
1379
1380 void
1381 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1382 {
1383 assert(renderer);
1384
1385 xfer_buffers_unmap(renderer);
1386
1387 util_delete_keymap(renderer->texview_map, renderer->pipe);
1388 cleanup_pipe_state(renderer);
1389 cleanup_shaders(renderer);
1390 cleanup_buffers(renderer);
1391
1392 pipe_surface_reference(&renderer->surface, NULL);
1393 pipe_surface_reference(&renderer->past, NULL);
1394 pipe_surface_reference(&renderer->future, NULL);
1395 }
1396
1397 void
1398 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1399 *renderer,
1400 struct pipe_surface *surface,
1401 struct pipe_surface *past,
1402 struct pipe_surface *future,
1403 unsigned num_macroblocks,
1404 struct pipe_mpeg12_macroblock
1405 *mpeg12_macroblocks,
1406 struct pipe_fence_handle **fence)
1407 {
1408 bool new_surface = false;
1409
1410 assert(renderer);
1411 assert(surface);
1412 assert(num_macroblocks);
1413 assert(mpeg12_macroblocks);
1414
1415 if (renderer->surface) {
1416 if (surface != renderer->surface) {
1417 if (renderer->num_macroblocks > 0) {
1418 xfer_buffers_unmap(renderer);
1419 flush(renderer);
1420 }
1421
1422 new_surface = true;
1423 }
1424
1425 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1426 assert(surface != renderer->surface || renderer->past == past);
1427 assert(surface != renderer->surface || renderer->future == future);
1428 }
1429 else
1430 new_surface = true;
1431
1432 if (new_surface) {
1433 pipe_surface_reference(&renderer->surface, surface);
1434 pipe_surface_reference(&renderer->past, past);
1435 pipe_surface_reference(&renderer->future, future);
1436 renderer->fence = fence;
1437 }
1438
1439 while (num_macroblocks) {
1440 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1441 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1442 unsigned i;
1443
1444 for (i = 0; i < num_to_submit; ++i) {
1445 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1446 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1447 }
1448
1449 num_macroblocks -= num_to_submit;
1450
1451 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1452 xfer_buffers_unmap(renderer);
1453 flush(renderer);
1454 xfer_buffers_map(renderer);
1455 /* Next time we get this surface it may have new ref frames */
1456 pipe_surface_reference(&renderer->surface, NULL);
1457 pipe_surface_reference(&renderer->past, NULL);
1458 pipe_surface_reference(&renderer->future, NULL);
1459 }
1460 }
1461 }