[g3dvl] remove invalid use of assert
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
39
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45
46 struct vertex_shader_consts
47 {
48 struct vertex4f norm;
49 };
50
51 struct fragment_shader_consts
52 {
53 struct vertex4f multiplier;
54 struct vertex4f div;
55 };
56
57 struct vert_stream_0
58 {
59 struct vertex2f pos;
60 float interlaced;
61 };
62
63 enum VS_INPUT
64 {
65 VS_I_RECT,
66 VS_I_VPOS,
67 VS_I_INTERLACED,
68 VS_I_MV0,
69 VS_I_MV1,
70 VS_I_MV2,
71 VS_I_MV3,
72
73 NUM_VS_INPUTS
74 };
75
76 enum VS_OUTPUT
77 {
78 VS_O_VPOS,
79 VS_O_LINE,
80 VS_O_TEX0,
81 VS_O_TEX1,
82 VS_O_TEX2,
83 VS_O_INTERLACED,
84 VS_O_MV0,
85 VS_O_MV1,
86 VS_O_MV2,
87 VS_O_MV3
88 };
89
90 enum MACROBLOCK_TYPE
91 {
92 MACROBLOCK_TYPE_INTRA,
93 MACROBLOCK_TYPE_FWD_FRAME_PRED,
94 MACROBLOCK_TYPE_FWD_FIELD_PRED,
95 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
96 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
97 MACROBLOCK_TYPE_BI_FRAME_PRED,
98 MACROBLOCK_TYPE_BI_FIELD_PRED,
99
100 NUM_MACROBLOCK_TYPES
101 };
102
103 /* vertices for a quad covering a macroblock */
104 static const struct vertex2f const_quad[4] = {
105 {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
106 };
107
108 static void *
109 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
110 {
111 struct ureg_program *shader;
112 struct ureg_src norm, mbs;
113 struct ureg_src vrect, vpos, interlaced, vmv[4];
114 struct ureg_dst scale, t_vpos, t_vtex;
115 struct ureg_dst o_vpos, o_line, o_vtex[3], o_interlaced, o_vmv[4];
116 unsigned i, j, count, label;
117
118 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
119 if (!shader)
120 return NULL;
121
122 norm = ureg_DECL_constant(shader, 0);
123 mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
124
125 scale = ureg_DECL_temporary(shader);
126 t_vpos = ureg_DECL_temporary(shader);
127 t_vtex = ureg_DECL_temporary(shader);
128
129 vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
130 vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
131 interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
132
133 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
134 o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
135 o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
136 o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
137 o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
138 o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
139
140 count=0;
141 for (i = 0; i < ref_frames; ++i) {
142 for (j = 0; j < 2; ++j) {
143 if(j < mv_per_frame) {
144 vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
145 o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count);
146 count++;
147 }
148 /* workaround for r600g */
149 else if(ref_frames == 2)
150 ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
151 }
152 }
153
154 /*
155 * scale = norm * mbs;
156 *
157 * t_vpos = (vpos + vrect) * scale
158 * o_vpos.xy = t_vpos
159 * o_vpos.zw = vpos
160 *
161 * o_line = vpos * 8
162 *
163 * if(interlaced) {
164 * t_vtex.x = vrect.x
165 * t_vtex.y = vrect.y * 0.5
166 * t_vtex += vpos
167 *
168 * o_vtex[0].xy = t_vtex * scale
169 *
170 * t_vtex.y += 0.5
171 * o_vtex[1].xy = t_vtex * scale
172 * } else {
173 * o_vtex[0..1].xy = t_vpos
174 * }
175 * o_vtex[2].xy = t_vpos
176 *
177 * if(count > 0) { // Apply motion vectors
178 * scale = norm * 0.5;
179 * o_vmv[0..count] = t_vpos + vmv[0..4] * scale
180 * }
181 *
182 */
183 ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
184
185 ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
186 ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
187 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
188 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
189
190 ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_XY), vrect,
191 ureg_imm2f(shader, MACROBLOCK_WIDTH / 2, MACROBLOCK_HEIGHT / 2));
192
193 ureg_IF(shader, interlaced, &label);
194
195 ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
196 ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
197 ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
198 ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
199 ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
200 ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
201
202 ureg_ELSE(shader, &label);
203
204 ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
205 ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
206
207 ureg_ENDIF(shader);
208 ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
209
210 ureg_MOV(shader, o_interlaced, interlaced);
211
212 if(count > 0) {
213 ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
214 for (i = 0; i < count; ++i)
215 ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
216 }
217
218 ureg_release_temporary(shader, t_vtex);
219 ureg_release_temporary(shader, t_vpos);
220 ureg_release_temporary(shader, scale);
221
222 ureg_END(shader);
223
224 return ureg_create_shader_and_destroy(shader, r->pipe);
225 }
226
227 static struct ureg_dst
228 calc_field(struct ureg_program *shader)
229 {
230 struct ureg_dst tmp;
231 struct ureg_src line;
232
233 tmp = ureg_DECL_temporary(shader);
234 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
235
236 /*
237 * line going from 0 to 8 in steps of 0.5
238 *
239 * tmp.z = fraction(line.y)
240 * tmp.z = tmp.z >= 0.5 ? 1 : 0
241 */
242 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
243 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
244
245 return tmp;
246 }
247
248 static struct ureg_dst
249 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
250 {
251 struct ureg_src tc[3], interlaced;
252 struct ureg_src sampler[3];
253 struct ureg_dst texel, t_tc, tmp;
254 unsigned i, label;
255
256 texel = ureg_DECL_temporary(shader);
257 t_tc = ureg_DECL_temporary(shader);
258 tmp = ureg_DECL_temporary(shader);
259
260 tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
261 tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
262 tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
263
264 interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
265
266 for (i = 0; i < 3; ++i) {
267 sampler[i] = ureg_DECL_sampler(shader, i);
268 }
269
270 /*
271 * texel.y = tex(field ? tc[1] : tc[0], sampler[0])
272 * texel.cb = tex(tc[2], sampler[1])
273 * texel.cr = tex(tc[2], sampler[2])
274 */
275 ureg_MUL(shader, tmp, interlaced, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y));
276
277 for (i = 0; i < 3; ++i) {
278 if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
279
280 ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &label);
281 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[1]);
282 ureg_ELSE(shader, &label);
283 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
284 ureg_ENDIF(shader);
285
286 } else {
287 ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]);
288 }
289
290 /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
291 ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
292 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
293 }
294
295 ureg_release_temporary(shader, t_tc);
296 ureg_release_temporary(shader, tmp);
297
298 return texel;
299 }
300
301 static void *
302 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
303 {
304 struct ureg_program *shader;
305 struct ureg_dst field, texel;
306 struct ureg_dst fragment;
307
308 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
309 if (!shader)
310 return NULL;
311
312 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
313
314 /*
315 * texel = fetch_ycbcr()
316 * fragment = texel * scale + 0.5
317 */
318 field = calc_field(shader);
319 texel = fetch_ycbcr(r, shader, field);
320 ureg_ADD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
321
322 ureg_release_temporary(shader, field);
323 ureg_release_temporary(shader, texel);
324 ureg_END(shader);
325
326 return ureg_create_shader_and_destroy(shader, r->pipe);
327 }
328
329 static void *
330 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
331 {
332 struct ureg_program *shader;
333 struct ureg_src tc;
334 struct ureg_src sampler;
335 struct ureg_dst field, texel, ref;
336 struct ureg_dst fragment;
337
338 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
339 if (!shader)
340 return NULL;
341
342 tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0, TGSI_INTERPOLATE_LINEAR);
343 sampler = ureg_DECL_sampler(shader, 3);
344
345 ref = ureg_DECL_temporary(shader);
346 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
347
348 /*
349 * texel = fetch_ycbcr()
350 * ref = tex(tc, sampler)
351 * fragment = texel * scale + ref
352 */
353 field = calc_field(shader);
354 texel = fetch_ycbcr(r, shader, field);
355 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
356 ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
357
358 ureg_release_temporary(shader, field);
359 ureg_release_temporary(shader, texel);
360 ureg_release_temporary(shader, ref);
361 ureg_END(shader);
362
363 return ureg_create_shader_and_destroy(shader, r->pipe);
364 }
365
366 static void *
367 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
368 {
369 struct ureg_program *shader;
370 struct ureg_src tc[2];
371 struct ureg_src sampler;
372 struct ureg_dst texel, ref, field;
373 struct ureg_dst fragment;
374 unsigned i, label;
375
376 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
377 if (!shader)
378 return NULL;
379
380 for (i = 0; i < 2; ++i)
381 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
382 sampler = ureg_DECL_sampler(shader, 3);
383
384 ref = ureg_DECL_temporary(shader);
385 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
386
387 /*
388 * texel = fetch_ycbcr()
389 * field = calc_field();
390 * if(field == 1)
391 * ref = tex(tc[1], sampler)
392 * else
393 * ref = tex(tc[0], sampler)
394 * fragment = texel * scale + ref
395 */
396 field = calc_field(shader);
397 texel = fetch_ycbcr(r, shader, field);
398
399 ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
400 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
401 ureg_ELSE(shader, &label);
402 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
403 ureg_ENDIF(shader);
404
405 ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
406
407 ureg_release_temporary(shader, field);
408 ureg_release_temporary(shader, texel);
409 ureg_release_temporary(shader, ref);
410 ureg_END(shader);
411
412 return ureg_create_shader_and_destroy(shader, r->pipe);
413 }
414
415 static void *
416 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
417 {
418 struct ureg_program *shader;
419 struct ureg_src tc[2];
420 struct ureg_src sampler[2];
421 struct ureg_dst field, texel, ref[2];
422 struct ureg_dst fragment;
423 unsigned i;
424
425 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
426 if (!shader)
427 return NULL;
428
429 for (i = 0; i < 2; ++i) {
430 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
431 sampler[i] = ureg_DECL_sampler(shader, i + 3);
432 }
433
434 ref[0] = ureg_DECL_temporary(shader);
435 ref[1] = ureg_DECL_temporary(shader);
436 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
437
438 /*
439 * texel = fetch_ycbcr()
440 * ref[0..1 = tex(tc[3..4], sampler[3..4])
441 * ref[0] = lerp(ref[0], ref[1], 0.5)
442 * fragment = texel * scale + ref[0]
443 */
444 field = calc_field(shader);
445 texel = fetch_ycbcr(r, shader, field);
446 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
447 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
448 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
449
450 ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
451
452 ureg_release_temporary(shader, field);
453 ureg_release_temporary(shader, texel);
454 ureg_release_temporary(shader, ref[0]);
455 ureg_release_temporary(shader, ref[1]);
456 ureg_END(shader);
457
458 return ureg_create_shader_and_destroy(shader, r->pipe);
459 }
460
461 static void *
462 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
463 {
464 struct ureg_program *shader;
465 struct ureg_src tc[4];
466 struct ureg_src sampler[2];
467 struct ureg_dst texel, ref[2], field;
468 struct ureg_dst fragment;
469 unsigned i, label;
470
471 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
472 if (!shader)
473 return NULL;
474
475 for (i = 0; i < 4; ++i)
476 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
477 for (i = 0; i < 2; ++i)
478 sampler[i] = ureg_DECL_sampler(shader, i + 3);
479
480 texel = ureg_DECL_temporary(shader);
481 ref[0] = ureg_DECL_temporary(shader);
482 ref[1] = ureg_DECL_temporary(shader);
483 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
484
485 /*
486 * texel = fetch_ycbcr()
487 * if(field == 1)
488 * ref[0..1] = tex(tc[1|3], sampler[0..1])
489 * else
490 * ref[0..1] = tex(tc[0|2], sampler[0..1])
491 * ref[0] = lerp(ref[0], ref[1], 0.5)
492 * fragment = texel * scale + ref[0]
493 */
494 field = calc_field(shader);
495 texel = fetch_ycbcr(r, shader, field);
496
497 ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
498 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
499 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
500 ureg_ELSE(shader, &label);
501 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
502 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
503 ureg_ENDIF(shader);
504
505 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
506
507 ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
508
509 ureg_release_temporary(shader, field);
510 ureg_release_temporary(shader, texel);
511 ureg_release_temporary(shader, ref[0]);
512 ureg_release_temporary(shader, ref[1]);
513 ureg_END(shader);
514
515 return ureg_create_shader_and_destroy(shader, r->pipe);
516 }
517
518 static bool
519 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
520 {
521 struct pipe_sampler_state sampler;
522 unsigned filters[5];
523 unsigned i;
524
525 assert(r);
526
527 r->viewport.scale[0] = r->pot_buffers ?
528 util_next_power_of_two(r->picture_width) : r->picture_width;
529 r->viewport.scale[1] = r->pot_buffers ?
530 util_next_power_of_two(r->picture_height) : r->picture_height;
531 r->viewport.scale[2] = 1;
532 r->viewport.scale[3] = 1;
533 r->viewport.translate[0] = 0;
534 r->viewport.translate[1] = 0;
535 r->viewport.translate[2] = 0;
536 r->viewport.translate[3] = 0;
537
538 r->fb_state.width = r->pot_buffers ?
539 util_next_power_of_two(r->picture_width) : r->picture_width;
540 r->fb_state.height = r->pot_buffers ?
541 util_next_power_of_two(r->picture_height) : r->picture_height;
542 r->fb_state.nr_cbufs = 1;
543 r->fb_state.zsbuf = NULL;
544
545 /* Luma filter */
546 filters[0] = PIPE_TEX_FILTER_NEAREST;
547 /* Chroma filters */
548 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
549 filters[1] = PIPE_TEX_FILTER_NEAREST;
550 filters[2] = PIPE_TEX_FILTER_NEAREST;
551 }
552 else {
553 filters[1] = PIPE_TEX_FILTER_LINEAR;
554 filters[2] = PIPE_TEX_FILTER_LINEAR;
555 }
556 /* Fwd, bkwd ref filters */
557 filters[3] = PIPE_TEX_FILTER_LINEAR;
558 filters[4] = PIPE_TEX_FILTER_LINEAR;
559
560 for (i = 0; i < 5; ++i) {
561 memset(&sampler, 0, sizeof(sampler));
562 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
563 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
564 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
565 sampler.min_img_filter = filters[i];
566 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
567 sampler.mag_img_filter = filters[i];
568 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
569 sampler.compare_func = PIPE_FUNC_ALWAYS;
570 sampler.normalized_coords = 1;
571 /*sampler.shadow_ambient = ; */
572 /*sampler.lod_bias = ; */
573 sampler.min_lod = 0;
574 /*sampler.max_lod = ; */
575 sampler.border_color[0] = 0.0f;
576 sampler.border_color[1] = 0.0f;
577 sampler.border_color[2] = 0.0f;
578 sampler.border_color[3] = 0.0f;
579 /*sampler.max_anisotropy = ; */
580 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
581 }
582
583 return true;
584 }
585
586 static void
587 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
588 {
589 unsigned i;
590
591 assert(r);
592
593 for (i = 0; i < 5; ++i)
594 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
595 }
596
597 static bool
598 init_shaders(struct vl_mpeg12_mc_renderer *r)
599 {
600 assert(r);
601
602 r->i_vs = create_vert_shader(r, 0, 0);
603 r->i_fs = create_intra_frag_shader(r);
604
605 r->p_vs[0] = create_vert_shader(r, 1, 1);
606 r->p_vs[1] = create_vert_shader(r, 1, 2);
607 r->p_fs[0] = create_frame_pred_frag_shader(r);
608 r->p_fs[1] = create_field_pred_frag_shader(r);
609
610 r->b_vs[0] = create_vert_shader(r, 2, 1);
611 r->b_vs[1] = create_vert_shader(r, 2, 2);
612 r->b_fs[0] = create_frame_bi_pred_frag_shader(r);
613 r->b_fs[1] = create_field_bi_pred_frag_shader(r);
614
615 return
616 r->i_vs != NULL &&
617 r->i_fs != NULL &&
618 r->p_vs[0] != NULL &&
619 r->p_vs[1] != NULL &&
620 r->p_fs[0] != NULL &&
621 r->p_fs[1] != NULL &&
622 r->b_vs[0] != NULL &&
623 r->b_vs[1] != NULL &&
624 r->b_fs[0] != NULL &&
625 r->b_fs[1] != NULL;
626 }
627
628 static void
629 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
630 {
631 assert(r);
632
633 r->pipe->delete_vs_state(r->pipe, r->i_vs);
634 r->pipe->delete_fs_state(r->pipe, r->i_fs);
635 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
636 r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
637 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
638 r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
639 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
640 r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
641 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
642 r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
643 }
644
645 static bool
646 init_buffers(struct vl_mpeg12_mc_renderer *r)
647 {
648 struct pipe_resource template;
649 struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
650 struct pipe_sampler_view sampler_view;
651
652 const unsigned mbw =
653 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
654 const unsigned mbh =
655 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
656
657 unsigned i;
658
659 assert(r);
660
661 r->macroblocks_per_batch =
662 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
663 r->num_macroblocks = 0;
664 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
665
666 memset(&template, 0, sizeof(struct pipe_resource));
667 template.target = PIPE_TEXTURE_2D;
668 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
669 template.format = PIPE_FORMAT_R16_SNORM;
670 template.last_level = 0;
671 template.width0 = r->pot_buffers ?
672 util_next_power_of_two(r->picture_width) : r->picture_width;
673 template.height0 = r->pot_buffers ?
674 util_next_power_of_two(r->picture_height) : r->picture_height;
675 template.depth0 = 1;
676 template.usage = PIPE_USAGE_DYNAMIC;
677 template.bind = PIPE_BIND_SAMPLER_VIEW;
678 template.flags = 0;
679
680 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
681
682 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
683 template.width0 = r->pot_buffers ?
684 util_next_power_of_two(r->picture_width / 2) :
685 r->picture_width / 2;
686 template.height0 = r->pot_buffers ?
687 util_next_power_of_two(r->picture_height / 2) :
688 r->picture_height / 2;
689 }
690 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
691 template.height0 = r->pot_buffers ?
692 util_next_power_of_two(r->picture_height / 2) :
693 r->picture_height / 2;
694
695 r->textures.individual.cb =
696 r->pipe->screen->resource_create(r->pipe->screen, &template);
697 r->textures.individual.cr =
698 r->pipe->screen->resource_create(r->pipe->screen, &template);
699
700 for (i = 0; i < 3; ++i) {
701 u_sampler_view_default_template(&sampler_view,
702 r->textures.all[i],
703 r->textures.all[i]->format);
704 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
705 }
706
707 r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
708 r->vertex_bufs.individual.rect.max_index = 4 * r->macroblocks_per_batch - 1;
709 r->vertex_bufs.individual.rect.buffer_offset = 0;
710 r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
711 (
712 r->pipe->screen,
713 PIPE_BIND_VERTEX_BUFFER,
714 sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
715 );
716
717 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
718 r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
719 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
720 /* XXX: Create with usage DYNAMIC or STREAM */
721 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
722 (
723 r->pipe->screen,
724 PIPE_BIND_VERTEX_BUFFER,
725 sizeof(struct vert_stream_0) * 4 * r->macroblocks_per_batch
726 );
727
728 for (i = 0; i < 2; ++i) {
729 r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
730 r->vertex_bufs.individual.ref[i].max_index = 4 * r->macroblocks_per_batch - 1;
731 r->vertex_bufs.individual.ref[i].buffer_offset = 0;
732 /* XXX: Create with usage DYNAMIC or STREAM */
733 r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
734 (
735 r->pipe->screen,
736 PIPE_BIND_VERTEX_BUFFER,
737 sizeof(struct vertex2f) * 2 * 4 * r->macroblocks_per_batch
738 );
739 }
740
741 memset(&vertex_elems, 0, sizeof(vertex_elems));
742
743 /* Rectangle element */
744 vertex_elems[VS_I_RECT].src_offset = 0;
745 vertex_elems[VS_I_RECT].instance_divisor = 0;
746 vertex_elems[VS_I_RECT].vertex_buffer_index = 0;
747 vertex_elems[VS_I_RECT].src_format = PIPE_FORMAT_R32G32_FLOAT;
748
749 /* Position element */
750 vertex_elems[VS_I_VPOS].src_offset = 0;
751 vertex_elems[VS_I_VPOS].instance_divisor = 0;
752 vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
753 vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
754
755 /* progressive=1.0f interlaced=0.0f */
756 vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f);
757 vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
758 vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
759 vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
760
761 /* First ref surface top field texcoord element */
762 vertex_elems[VS_I_MV0].src_offset = 0;
763 vertex_elems[VS_I_MV0].instance_divisor = 0;
764 vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
765 vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
766
767 /* First ref surface bottom field texcoord element */
768 vertex_elems[VS_I_MV1].src_offset = sizeof(struct vertex2f);
769 vertex_elems[VS_I_MV1].instance_divisor = 0;
770 vertex_elems[VS_I_MV1].vertex_buffer_index = 2;
771 vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
772
773 /* Second ref surface top field texcoord element */
774 vertex_elems[VS_I_MV2].src_offset = 0;
775 vertex_elems[VS_I_MV2].instance_divisor = 0;
776 vertex_elems[VS_I_MV2].vertex_buffer_index = 3;
777 vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
778
779 /* Second ref surface bottom field texcoord element */
780 vertex_elems[VS_I_MV3].src_offset = sizeof(struct vertex2f);
781 vertex_elems[VS_I_MV3].instance_divisor = 0;
782 vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
783 vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
784
785 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
786 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
787 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
788
789 r->vs_const_buf = pipe_buffer_create
790 (
791 r->pipe->screen,
792 PIPE_BIND_CONSTANT_BUFFER,
793 sizeof(struct vertex_shader_consts)
794 );
795
796 return true;
797 }
798
799 static void
800 init_const_buffers(struct vl_mpeg12_mc_renderer *r)
801 {
802 struct pipe_transfer *buf_transfer;
803 struct vertex2f *rect;
804 unsigned i;
805
806 rect = pipe_buffer_map
807 (
808 r->pipe,
809 r->vertex_bufs.individual.rect.buffer,
810 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
811 &buf_transfer
812 );
813
814 for ( i = 0; i < r->macroblocks_per_batch; ++i)
815 memcpy(rect + i * 4, &const_quad, sizeof(const_quad));
816
817 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
818 }
819
820 static void
821 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
822 {
823 unsigned i;
824
825 assert(r);
826
827 pipe_resource_reference(&r->vs_const_buf, NULL);
828
829 for (i = 0; i < 3; ++i) {
830 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
831 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
832 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
833 pipe_resource_reference(&r->textures.all[i], NULL);
834 }
835
836 FREE(r->macroblock_buf);
837 }
838
839 static enum MACROBLOCK_TYPE
840 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
841 {
842 assert(mb);
843
844 switch (mb->mb_type) {
845 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
846 return MACROBLOCK_TYPE_INTRA;
847 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
848 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
849 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
850 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
851 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
852 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
853 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
854 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
855 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
856 default:
857 assert(0);
858 }
859
860 /* Unreachable */
861 return -1;
862 }
863
864 void
865 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
866 struct pipe_mpeg12_macroblock *mb, unsigned pos,
867 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
868 {
869 struct vertex2f mo_vec[2];
870
871 unsigned i;
872
873 assert(r);
874 assert(mb);
875 assert(ycbcr_vb);
876 assert(pos < r->macroblocks_per_batch);
877
878 mo_vec[1].x = 0;
879 mo_vec[1].y = 0;
880
881 switch (mb->mb_type) {
882 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
883 {
884 struct vertex2f *vb;
885
886 assert(ref_vb && ref_vb[1]);
887
888 vb = ref_vb[1] + pos * 2 * 4;
889
890 mo_vec[0].x = mb->pmv[0][1][0];
891 mo_vec[0].y = mb->pmv[0][1][1];
892
893 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
894 for (i = 0; i < 4 * 2; i += 2) {
895 vb[i].x = mo_vec[0].x;
896 vb[i].y = mo_vec[0].y;
897 }
898 }
899 else {
900 mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
901
902 mo_vec[1].x = mb->pmv[1][1][0];
903 mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
904
905 if(mb->mvfs[0][1]) mo_vec[0].y += 2;
906 if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
907
908 for (i = 0; i < 4 * 2; i += 2) {
909 vb[i].x = mo_vec[0].x;
910 vb[i].y = mo_vec[0].y;
911 vb[i + 1].x = mo_vec[1].x;
912 vb[i + 1].y = mo_vec[1].y;
913 }
914 }
915
916 /* fall-through */
917 }
918 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
919 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
920 {
921 struct vertex2f *vb;
922
923 assert(ref_vb && ref_vb[0]);
924
925 vb = ref_vb[0] + pos * 2 * 4;
926
927 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
928 mo_vec[0].x = mb->pmv[0][1][0];
929 mo_vec[0].y = mb->pmv[0][1][1];
930
931 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
932 mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
933
934 mo_vec[1].x = mb->pmv[1][1][0];
935 mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
936
937 if(mb->mvfs[0][1]) mo_vec[0].y += 2;
938 if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
939 }
940 }
941 else {
942 mo_vec[0].x = mb->pmv[0][0][0];
943 mo_vec[0].y = mb->pmv[0][0][1];
944
945 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
946 mo_vec[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
947
948 mo_vec[1].x = mb->pmv[1][0][0];
949 mo_vec[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
950
951 if(mb->mvfs[0][0]) mo_vec[0].y += 2;
952 if(!mb->mvfs[1][0]) mo_vec[1].y -= 2;
953 }
954 }
955
956 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
957 for (i = 0; i < 4 * 2; i += 2) {
958 vb[i].x = mo_vec[0].x;
959 vb[i].y = mo_vec[0].y;
960 }
961 }
962 else {
963 for (i = 0; i < 4 * 2; i += 2) {
964 vb[i].x = mo_vec[0].x;
965 vb[i].y = mo_vec[0].y;
966 vb[i + 1].x = mo_vec[1].x;
967 vb[i + 1].y = mo_vec[1].y;
968 }
969 }
970
971 /* fall-through */
972 }
973 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
974 {
975 struct vert_stream_0 *vb = ycbcr_vb + pos * 4;
976 struct vert_stream_0 v;
977
978 v.pos.x = mb->mbx;
979 v.pos.y = mb->mby;
980
981 v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
982
983 for ( i = 0; i < 4; ++i )
984 memcpy(vb + i, &v, sizeof(v));
985
986 break;
987 }
988 default:
989 assert(0);
990 }
991 }
992
993 static void
994 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
995 unsigned *num_macroblocks)
996 {
997 unsigned offset[NUM_MACROBLOCK_TYPES];
998 struct vert_stream_0 *ycbcr_vb;
999 struct vertex2f *ref_vb[2];
1000 struct pipe_transfer *buf_transfer[3];
1001 unsigned i;
1002
1003 assert(r);
1004 assert(num_macroblocks);
1005
1006 for (i = 0; i < r->num_macroblocks; ++i) {
1007 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1008 ++num_macroblocks[mb_type];
1009 }
1010
1011 offset[0] = 0;
1012
1013 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1014 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1015
1016 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1017 (
1018 r->pipe,
1019 r->vertex_bufs.individual.ycbcr.buffer,
1020 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1021 &buf_transfer[0]
1022 );
1023
1024 for (i = 0; i < 2; ++i)
1025 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1026 (
1027 r->pipe,
1028 r->vertex_bufs.individual.ref[i].buffer,
1029 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1030 &buf_transfer[i + 1]
1031 );
1032
1033 for (i = 0; i < r->num_macroblocks; ++i) {
1034 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1035
1036 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1037 ycbcr_vb, ref_vb);
1038
1039 ++offset[mb_type];
1040 }
1041
1042 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1043 for (i = 0; i < 2; ++i)
1044 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1045 }
1046
1047 static struct pipe_sampler_view
1048 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1049 {
1050 struct pipe_sampler_view *sampler_view;
1051 assert(r);
1052 assert(surface);
1053
1054 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1055 if (!sampler_view) {
1056 struct pipe_sampler_view templat;
1057 boolean added_to_map;
1058
1059 u_sampler_view_default_template(&templat, surface->texture,
1060 surface->texture->format);
1061 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1062 &templat);
1063 if (!sampler_view)
1064 return NULL;
1065
1066 added_to_map = util_keymap_insert(r->texview_map, &surface,
1067 sampler_view, r->pipe);
1068 assert(added_to_map);
1069 }
1070
1071 return sampler_view;
1072 }
1073
1074 static void
1075 flush(struct vl_mpeg12_mc_renderer *r)
1076 {
1077 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1078 unsigned vb_start = 0;
1079
1080 assert(r);
1081 assert(r->num_macroblocks == r->macroblocks_per_batch);
1082
1083 vl_idct_flush(&r->idct_y);
1084 vl_idct_flush(&r->idct_cr);
1085 vl_idct_flush(&r->idct_cb);
1086
1087 gen_macroblock_stream(r, num_macroblocks);
1088
1089 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
1090 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1091 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1092
1093 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1094 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1095 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1096 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1097 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1098 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1099 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1100
1101 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1102 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4);
1103 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4;
1104 }
1105
1106 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1107 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1108 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1109 r->textures.individual.ref[0] = r->past->texture;
1110 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1111 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1112 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1113 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1114 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1115
1116 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1117 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4);
1118 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4;
1119 }
1120
1121 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
1122 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1123 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1124 r->textures.individual.ref[0] = r->past->texture;
1125 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1126 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1127 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1128 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1129 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1130
1131 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1132 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4);
1133 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4;
1134 }
1135
1136 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1137 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1138 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1139 r->textures.individual.ref[0] = r->future->texture;
1140 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1141 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1142 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1143 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1144 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1145
1146 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1147 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4);
1148 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4;
1149 }
1150
1151 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
1152 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1153 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1154 r->textures.individual.ref[0] = r->future->texture;
1155 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1156 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1157 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1158 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1159 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1160
1161 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1162 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4);
1163 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4;
1164 }
1165
1166 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1167 r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
1168 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1169 r->textures.individual.ref[0] = r->past->texture;
1170 r->textures.individual.ref[1] = r->future->texture;
1171 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1172 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1173 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1174 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1175 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1176 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1177
1178 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1179 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4);
1180 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4;
1181 }
1182
1183 if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
1184 r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
1185 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1186 r->textures.individual.ref[0] = r->past->texture;
1187 r->textures.individual.ref[1] = r->future->texture;
1188 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1189 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1190 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1191 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1192 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1193 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1194
1195 util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
1196 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4);
1197 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4;
1198 }
1199
1200 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1201
1202 r->num_macroblocks = 0;
1203 }
1204
1205 static void
1206 update_render_target(struct vl_mpeg12_mc_renderer *r)
1207 {
1208 struct pipe_transfer *buf_transfer;
1209 struct vertex_shader_consts *vs_consts;
1210
1211 vs_consts = pipe_buffer_map
1212 (
1213 r->pipe, r->vs_const_buf,
1214 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1215 &buf_transfer
1216 );
1217
1218 vs_consts->norm.x = 1.0f / r->surface->width;
1219 vs_consts->norm.y = 1.0f / r->surface->height;
1220
1221 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1222
1223 r->fb_state.cbufs[0] = r->surface;
1224 }
1225
1226 static void
1227 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1228 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1229 {
1230 unsigned tb = 0;
1231 unsigned x, y;
1232
1233 assert(r);
1234 assert(blocks);
1235
1236 for (y = 0; y < 2; ++y) {
1237 for (x = 0; x < 2; ++x, ++tb) {
1238 bool eb = !(cbp & (1 << (5 - tb)));
1239 vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks);
1240 blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
1241 }
1242 }
1243
1244 /* TODO: Implement 422, 444 */
1245 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1246
1247 for (tb = 0; tb < 2; ++tb) {
1248 bool eb = !(cbp & (1 << (1 - tb)));
1249 if(tb == 0)
1250 vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks);
1251 else
1252 vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks);
1253 blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
1254 }
1255 }
1256
1257 static void
1258 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1259 struct pipe_mpeg12_macroblock *mb)
1260 {
1261 assert(r);
1262 assert(mb);
1263 assert(mb->blocks);
1264 assert(r->num_macroblocks < r->macroblocks_per_batch);
1265
1266 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1267 sizeof(struct pipe_mpeg12_macroblock));
1268
1269 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1270
1271 ++r->num_macroblocks;
1272 }
1273
1274 static void
1275 texview_map_delete(const struct keymap *map,
1276 const void *key, void *data,
1277 void *user)
1278 {
1279 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1280
1281 assert(map);
1282 assert(key);
1283 assert(data);
1284 assert(user);
1285
1286 pipe_sampler_view_reference(&sv, NULL);
1287 }
1288
1289 bool
1290 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1291 struct pipe_context *pipe,
1292 unsigned picture_width,
1293 unsigned picture_height,
1294 enum pipe_video_chroma_format chroma_format,
1295 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1296 bool pot_buffers)
1297 {
1298 assert(renderer);
1299 assert(pipe);
1300 /* TODO: Implement other policies */
1301 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1302 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1303 assert(pot_buffers);
1304
1305 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1306
1307 renderer->pipe = pipe;
1308 renderer->picture_width = picture_width;
1309 renderer->picture_height = picture_height;
1310 renderer->chroma_format = chroma_format;
1311 renderer->bufmode = bufmode;
1312 renderer->pot_buffers = pot_buffers;
1313
1314 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1315 texview_map_delete);
1316 if (!renderer->texview_map)
1317 return false;
1318
1319 if (!init_pipe_state(renderer))
1320 goto error_pipe_state;
1321
1322 if (!init_shaders(renderer))
1323 goto error_shaders;
1324
1325 if (!init_buffers(renderer))
1326 goto error_buffers;
1327
1328 init_const_buffers(renderer);
1329
1330 renderer->surface = NULL;
1331 renderer->past = NULL;
1332 renderer->future = NULL;
1333 renderer->num_macroblocks = 0;
1334
1335 if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y))
1336 goto error_idct_y;
1337
1338 if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr))
1339 goto error_idct_cr;
1340
1341 if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb))
1342 goto error_idct_cb;
1343
1344 return true;
1345
1346 error_idct_cb:
1347 vl_idct_cleanup(&renderer->idct_cr);
1348
1349 error_idct_cr:
1350 vl_idct_cleanup(&renderer->idct_y);
1351
1352 error_idct_y:
1353 cleanup_buffers(renderer);
1354
1355 error_buffers:
1356 cleanup_shaders(renderer);
1357
1358 error_shaders:
1359 cleanup_pipe_state(renderer);
1360
1361 error_pipe_state:
1362 util_delete_keymap(renderer->texview_map, renderer->pipe);
1363 return false;
1364 }
1365
1366 void
1367 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1368 {
1369 assert(renderer);
1370
1371 vl_idct_cleanup(&renderer->idct_y);
1372 vl_idct_cleanup(&renderer->idct_cr);
1373 vl_idct_cleanup(&renderer->idct_cb);
1374
1375 util_delete_keymap(renderer->texview_map, renderer->pipe);
1376 cleanup_pipe_state(renderer);
1377 cleanup_shaders(renderer);
1378 cleanup_buffers(renderer);
1379
1380 pipe_surface_reference(&renderer->surface, NULL);
1381 pipe_surface_reference(&renderer->past, NULL);
1382 pipe_surface_reference(&renderer->future, NULL);
1383 }
1384
1385 void
1386 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1387 *renderer,
1388 struct pipe_surface *surface,
1389 struct pipe_surface *past,
1390 struct pipe_surface *future,
1391 unsigned num_macroblocks,
1392 struct pipe_mpeg12_macroblock
1393 *mpeg12_macroblocks,
1394 struct pipe_fence_handle **fence)
1395 {
1396 bool new_surface = false;
1397
1398 assert(renderer);
1399 assert(surface);
1400 assert(num_macroblocks);
1401 assert(mpeg12_macroblocks);
1402
1403 if (renderer->surface) {
1404 if (surface != renderer->surface) {
1405 if (renderer->num_macroblocks > 0) {
1406 flush(renderer);
1407 }
1408
1409 new_surface = true;
1410 }
1411
1412 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1413 assert(surface != renderer->surface || renderer->past == past);
1414 assert(surface != renderer->surface || renderer->future == future);
1415 }
1416 else
1417 new_surface = true;
1418
1419 if (new_surface) {
1420 pipe_surface_reference(&renderer->surface, surface);
1421 pipe_surface_reference(&renderer->past, past);
1422 pipe_surface_reference(&renderer->future, future);
1423 renderer->fence = fence;
1424 update_render_target(renderer);
1425 }
1426
1427 while (num_macroblocks) {
1428 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1429 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1430 unsigned i;
1431
1432 for (i = 0; i < num_to_submit; ++i) {
1433 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1434 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1435 }
1436
1437 num_macroblocks -= num_to_submit;
1438
1439 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1440 flush(renderer);
1441 /* Next time we get this surface it may have new ref frames */
1442 pipe_surface_reference(&renderer->surface, NULL);
1443 pipe_surface_reference(&renderer->past, NULL);
1444 pipe_surface_reference(&renderer->future, NULL);
1445 }
1446 }
1447 }