[g3dvl] move vertex normalisation into vertex shader
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
39
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45 #define ZERO_BLOCK_NIL -1.0f
46 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
47 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
48
49 struct vertex_shader_consts
50 {
51 struct vertex4f norm;
52 };
53
54 struct fragment_shader_consts
55 {
56 struct vertex4f multiplier;
57 struct vertex4f div;
58 };
59
60 struct vert_stream_0
61 {
62 struct vertex2f pos;
63 struct vertex2f luma_tc;
64 struct vertex2f cb_tc;
65 struct vertex2f cr_tc;
66 };
67
68 enum MACROBLOCK_TYPE
69 {
70 MACROBLOCK_TYPE_INTRA,
71 MACROBLOCK_TYPE_FWD_FRAME_PRED,
72 MACROBLOCK_TYPE_FWD_FIELD_PRED,
73 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
74 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
75 MACROBLOCK_TYPE_BI_FRAME_PRED,
76 MACROBLOCK_TYPE_BI_FIELD_PRED,
77
78 NUM_MACROBLOCK_TYPES
79 };
80
81 static void *
82 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
83 {
84 struct ureg_program *shader;
85 struct ureg_src norm;
86 struct ureg_src vpos, vtex[3], vmv[4];
87 struct ureg_dst temp;
88 struct ureg_dst o_vpos, o_vtex[3], o_vmv[4];
89 unsigned i, j, count;
90
91 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
92 if (!shader)
93 return NULL;
94
95 norm = ureg_DECL_constant(shader, 0);
96 temp = ureg_DECL_temporary(shader);
97
98 vpos = ureg_DECL_vs_input(shader, 0);
99 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
100
101 for (i = 0; i < 3; ++i) {
102 vtex[i] = ureg_DECL_vs_input(shader, 1 + i);
103 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
104 }
105
106 count=0;
107 for (i = 0; i < ref_frames; ++i) {
108 for (j = 0; j < 2; ++j) {
109 if(j < mv_per_frame) {
110 vmv[count] = ureg_DECL_vs_input(shader, 4 + i * 2 + j);
111 o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
112 count++;
113 }
114 /* workaround for r600g */
115 else if(ref_frames == 2)
116 ureg_DECL_vs_input(shader, 4 + i * 2 + j);
117 }
118 }
119
120 /*
121 * o_vpos = vpos * norm
122 * o_vtex[0..2] = vtex[0..2] * norm
123 * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
124 */
125 ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, norm);
126 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
127 for (i = 0; i < 3; ++i) {
128 ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vtex[i], norm);
129 ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_ZW), vtex[i]);
130 }
131 for (i = 0; i < count; ++i) {
132 ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vmv[i],
133 ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
134 ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
135 ureg_MOV(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), vmv[i]);
136 }
137
138 ureg_release_temporary(shader, temp);
139 ureg_END(shader);
140
141 return ureg_create_shader_and_destroy(shader, r->pipe);
142 }
143
144 static void *
145 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
146 {
147 struct ureg_program *shader;
148 struct ureg_src tc[3];
149 struct ureg_src sampler[3];
150 struct ureg_dst texel, temp;
151 struct ureg_dst fragment;
152 unsigned i;
153
154 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
155 if (!shader)
156 return NULL;
157
158 for (i = 0; i < 3; ++i) {
159 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
160 sampler[i] = ureg_DECL_sampler(shader, i);
161 }
162 texel = ureg_DECL_temporary(shader);
163 temp = ureg_DECL_temporary(shader);
164 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
165
166 /*
167 * texel.r = tex(tc[0], sampler[0])
168 * texel.g = tex(tc[1], sampler[1])
169 * texel.b = tex(tc[2], sampler[2])
170 * fragment = texel * scale
171 */
172 for (i = 0; i < 3; ++i) {
173 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
174 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
175 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
176 }
177 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
178
179 ureg_release_temporary(shader, texel);
180 ureg_release_temporary(shader, temp);
181 ureg_END(shader);
182
183 return ureg_create_shader_and_destroy(shader, r->pipe);
184 }
185
186 static void *
187 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
188 {
189 struct ureg_program *shader;
190 struct ureg_src tc[4];
191 struct ureg_src sampler[4];
192 struct ureg_dst texel, ref;
193 struct ureg_dst fragment;
194 unsigned i;
195
196 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
197 if (!shader)
198 return NULL;
199
200 for (i = 0; i < 4; ++i) {
201 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
202 sampler[i] = ureg_DECL_sampler(shader, i);
203 }
204 texel = ureg_DECL_temporary(shader);
205 ref = ureg_DECL_temporary(shader);
206 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
207
208 /*
209 * texel.r = tex(tc[0], sampler[0])
210 * texel.g = tex(tc[1], sampler[1])
211 * texel.b = tex(tc[2], sampler[2])
212 * ref = tex(tc[3], sampler[3])
213 * fragment = texel * scale + ref
214 */
215 for (i = 0; i < 3; ++i) {
216 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
217 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
218 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
219 }
220 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
221 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
222
223 ureg_release_temporary(shader, texel);
224 ureg_release_temporary(shader, ref);
225 ureg_END(shader);
226
227 return ureg_create_shader_and_destroy(shader, r->pipe);
228 }
229
230 static void *
231 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
232 {
233 struct ureg_program *shader;
234 struct ureg_src tc[5];
235 struct ureg_src sampler[4];
236 struct ureg_dst texel, ref, tmp;
237 struct ureg_dst fragment;
238 unsigned i;
239
240 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
241 if (!shader)
242 return NULL;
243
244 for (i = 0; i < 5; ++i)
245 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
246 for (i = 0; i < 4; ++i)
247 sampler[i] = ureg_DECL_sampler(shader, i);
248
249 texel = ureg_DECL_temporary(shader);
250 ref = ureg_DECL_temporary(shader);
251 tmp = ureg_DECL_temporary(shader);
252 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
253
254 /*
255 * texel.r = tex(tc[0], sampler[0])
256 * texel.g = tex(tc[1], sampler[1])
257 * texel.b = tex(tc[2], sampler[2])
258 * ref = tex(tc[3], sampler[3])
259 * fragment = texel * scale + ref
260 */
261 for (i = 0; i < 3; ++i) {
262 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
263 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
264 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
265 }
266
267 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
268 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
269
270 ureg_release_temporary(shader, tmp);
271 ureg_release_temporary(shader, texel);
272 ureg_release_temporary(shader, ref);
273 ureg_END(shader);
274
275 return ureg_create_shader_and_destroy(shader, r->pipe);
276 }
277
278 static void *
279 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
280 {
281 struct ureg_program *shader;
282 struct ureg_src tc[5];
283 struct ureg_src sampler[5];
284 struct ureg_dst texel, ref[2];
285 struct ureg_dst fragment;
286 unsigned i;
287
288 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
289 if (!shader)
290 return NULL;
291
292 for (i = 0; i < 5; ++i) {
293 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
294 sampler[i] = ureg_DECL_sampler(shader, i);
295 }
296 texel = ureg_DECL_temporary(shader);
297 ref[0] = ureg_DECL_temporary(shader);
298 ref[1] = ureg_DECL_temporary(shader);
299 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
300
301 /*
302 * texel.r = tex(tc[0], sampler[0])
303 * texel.g = tex(tc[1], sampler[1])
304 * texel.b = tex(tc[2], sampler[2])
305 * ref[0..1 = tex(tc[3..4], sampler[3..4])
306 * ref[0] = lerp(ref[0], ref[1], 0.5)
307 * fragment = texel * scale + ref[0]
308 */
309 for (i = 0; i < 3; ++i) {
310 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
311 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
312 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
313 }
314 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
315 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
316 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
317
318 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
319
320 ureg_release_temporary(shader, texel);
321 ureg_release_temporary(shader, ref[0]);
322 ureg_release_temporary(shader, ref[1]);
323 ureg_END(shader);
324
325 return ureg_create_shader_and_destroy(shader, r->pipe);
326 }
327
328 static void *
329 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
330 {
331 struct ureg_program *shader;
332 struct ureg_src tc[5];
333 struct ureg_src sampler[5];
334 struct ureg_dst texel, ref[2];
335 struct ureg_dst fragment;
336 unsigned i;
337
338 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
339 if (!shader)
340 return NULL;
341
342 for (i = 0; i < 5; ++i) {
343 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
344 sampler[i] = ureg_DECL_sampler(shader, i);
345 }
346 texel = ureg_DECL_temporary(shader);
347 ref[0] = ureg_DECL_temporary(shader);
348 ref[1] = ureg_DECL_temporary(shader);
349 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
350
351 /*
352 * texel.r = tex(tc[0], sampler[0])
353 * texel.g = tex(tc[1], sampler[1])
354 * texel.b = tex(tc[2], sampler[2])
355 * ref[0..1 = tex(tc[3..4], sampler[3..4])
356 * ref[0] = lerp(ref[0], ref[1], 0.5)
357 * fragment = texel * scale + ref[0]
358 */
359 for (i = 0; i < 3; ++i) {
360 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
361 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
362 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
363 }
364 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
365 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
366 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
367
368 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
369
370 ureg_release_temporary(shader, texel);
371 ureg_release_temporary(shader, ref[0]);
372 ureg_release_temporary(shader, ref[1]);
373 ureg_END(shader);
374
375 return ureg_create_shader_and_destroy(shader, r->pipe);
376 }
377
378 static void
379 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
380 {
381 unsigned i;
382
383 assert(r);
384
385 for (i = 0; i < 3; ++i) {
386 struct pipe_box rect =
387 {
388 0, 0, 0,
389 r->textures.all[i]->width0,
390 r->textures.all[i]->height0,
391 1
392 };
393
394 r->tex_transfer[i] = r->pipe->get_transfer
395 (
396 r->pipe, r->textures.all[i],
397 u_subresource(0, 0),
398 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
399 &rect
400 );
401
402 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
403 }
404 }
405
406 static void
407 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
408 {
409 unsigned i;
410
411 assert(r);
412
413 for (i = 0; i < 3; ++i) {
414 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
415 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
416 }
417 }
418
419 static bool
420 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
421 {
422 struct pipe_sampler_state sampler;
423 unsigned filters[5];
424 unsigned i;
425
426 assert(r);
427
428 r->viewport.scale[0] = r->pot_buffers ?
429 util_next_power_of_two(r->picture_width) : r->picture_width;
430 r->viewport.scale[1] = r->pot_buffers ?
431 util_next_power_of_two(r->picture_height) : r->picture_height;
432 r->viewport.scale[2] = 1;
433 r->viewport.scale[3] = 1;
434 r->viewport.translate[0] = 0;
435 r->viewport.translate[1] = 0;
436 r->viewport.translate[2] = 0;
437 r->viewport.translate[3] = 0;
438
439 r->fb_state.width = r->pot_buffers ?
440 util_next_power_of_two(r->picture_width) : r->picture_width;
441 r->fb_state.height = r->pot_buffers ?
442 util_next_power_of_two(r->picture_height) : r->picture_height;
443 r->fb_state.nr_cbufs = 1;
444 r->fb_state.zsbuf = NULL;
445
446 /* Luma filter */
447 filters[0] = PIPE_TEX_FILTER_NEAREST;
448 /* Chroma filters */
449 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
450 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
451 filters[1] = PIPE_TEX_FILTER_NEAREST;
452 filters[2] = PIPE_TEX_FILTER_NEAREST;
453 }
454 else {
455 filters[1] = PIPE_TEX_FILTER_LINEAR;
456 filters[2] = PIPE_TEX_FILTER_LINEAR;
457 }
458 /* Fwd, bkwd ref filters */
459 filters[3] = PIPE_TEX_FILTER_LINEAR;
460 filters[4] = PIPE_TEX_FILTER_LINEAR;
461
462 for (i = 0; i < 5; ++i) {
463 memset(&sampler, 0, sizeof(sampler));
464 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
465 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
466 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
467 sampler.min_img_filter = filters[i];
468 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
469 sampler.mag_img_filter = filters[i];
470 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
471 sampler.compare_func = PIPE_FUNC_ALWAYS;
472 sampler.normalized_coords = 1;
473 /*sampler.shadow_ambient = ; */
474 /*sampler.lod_bias = ; */
475 sampler.min_lod = 0;
476 /*sampler.max_lod = ; */
477 /*sampler.border_color[i] = ; */
478 /*sampler.max_anisotropy = ; */
479 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
480 }
481
482 return true;
483 }
484
485 static void
486 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
487 {
488 unsigned i;
489
490 assert(r);
491
492 for (i = 0; i < 5; ++i)
493 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
494 }
495
496 static bool
497 init_shaders(struct vl_mpeg12_mc_renderer *r)
498 {
499 assert(r);
500
501 assert(r->i_vs = create_vert_shader(r, 0, 0));
502 assert(r->i_fs = create_intra_frag_shader(r));
503
504 assert(r->p_vs[0] = create_vert_shader(r, 1, 1));
505 assert(r->p_vs[1] = create_vert_shader(r, 1, 2));
506 assert(r->p_fs[0] = create_frame_pred_frag_shader(r));
507 assert(r->p_fs[1] = create_field_pred_frag_shader(r));
508
509 assert(r->b_vs[0] = create_vert_shader(r, 2, 1));
510 assert(r->b_vs[1] = create_vert_shader(r, 2, 2));
511 assert(r->b_fs[0] = create_frame_bi_pred_frag_shader(r));
512 assert(r->b_fs[1] = create_field_bi_pred_frag_shader(r));
513
514 return true;
515 }
516
517 static void
518 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
519 {
520 assert(r);
521
522 r->pipe->delete_vs_state(r->pipe, r->i_vs);
523 r->pipe->delete_fs_state(r->pipe, r->i_fs);
524 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
525 r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
526 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
527 r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
528 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
529 r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
530 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
531 r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
532 }
533
534 static bool
535 init_buffers(struct vl_mpeg12_mc_renderer *r)
536 {
537 struct pipe_resource template;
538 struct pipe_vertex_element vertex_elems[8];
539 struct pipe_sampler_view sampler_view;
540
541 const unsigned mbw =
542 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
543 const unsigned mbh =
544 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
545
546 unsigned i;
547
548 assert(r);
549
550 r->macroblocks_per_batch =
551 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
552 r->num_macroblocks = 0;
553 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
554
555 memset(&template, 0, sizeof(struct pipe_resource));
556 template.target = PIPE_TEXTURE_2D;
557 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
558 template.format = PIPE_FORMAT_R16_SNORM;
559 template.last_level = 0;
560 template.width0 = r->pot_buffers ?
561 util_next_power_of_two(r->picture_width) : r->picture_width;
562 template.height0 = r->pot_buffers ?
563 util_next_power_of_two(r->picture_height) : r->picture_height;
564 template.depth0 = 1;
565 template.usage = PIPE_USAGE_DYNAMIC;
566 template.bind = PIPE_BIND_SAMPLER_VIEW;
567 template.flags = 0;
568
569 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
570
571 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
572 template.width0 = r->pot_buffers ?
573 util_next_power_of_two(r->picture_width / 2) :
574 r->picture_width / 2;
575 template.height0 = r->pot_buffers ?
576 util_next_power_of_two(r->picture_height / 2) :
577 r->picture_height / 2;
578 }
579 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
580 template.height0 = r->pot_buffers ?
581 util_next_power_of_two(r->picture_height / 2) :
582 r->picture_height / 2;
583
584 r->textures.individual.cb =
585 r->pipe->screen->resource_create(r->pipe->screen, &template);
586 r->textures.individual.cr =
587 r->pipe->screen->resource_create(r->pipe->screen, &template);
588
589 for (i = 0; i < 3; ++i) {
590 u_sampler_view_default_template(&sampler_view,
591 r->textures.all[i],
592 r->textures.all[i]->format);
593 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
594 }
595
596 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
597 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
598 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
599 /* XXX: Create with usage DYNAMIC or STREAM */
600 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
601 (
602 r->pipe->screen,
603 PIPE_BIND_VERTEX_BUFFER,
604 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
605 );
606
607 for (i = 1; i < 3; ++i) {
608 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
609 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
610 r->vertex_bufs.all[i].buffer_offset = 0;
611 /* XXX: Create with usage DYNAMIC or STREAM */
612 r->vertex_bufs.all[i].buffer = pipe_buffer_create
613 (
614 r->pipe->screen,
615 PIPE_BIND_VERTEX_BUFFER,
616 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
617 );
618 }
619
620 memset(&vertex_elems, 0, sizeof(vertex_elems));
621
622 /* Position element */
623 vertex_elems[0].src_offset = 0;
624 vertex_elems[0].instance_divisor = 0;
625 vertex_elems[0].vertex_buffer_index = 0;
626 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
627
628 /* Luma, texcoord element */
629 vertex_elems[1].src_offset = sizeof(struct vertex2f);
630 vertex_elems[1].instance_divisor = 0;
631 vertex_elems[1].vertex_buffer_index = 0;
632 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
633
634 /* Chroma Cr texcoord element */
635 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
636 vertex_elems[2].instance_divisor = 0;
637 vertex_elems[2].vertex_buffer_index = 0;
638 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
639
640 /* Chroma Cb texcoord element */
641 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
642 vertex_elems[3].instance_divisor = 0;
643 vertex_elems[3].vertex_buffer_index = 0;
644 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
645
646 /* First ref surface top field texcoord element */
647 vertex_elems[4].src_offset = 0;
648 vertex_elems[4].instance_divisor = 0;
649 vertex_elems[4].vertex_buffer_index = 1;
650 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
651
652 /* First ref surface bottom field texcoord element */
653 vertex_elems[5].src_offset = sizeof(struct vertex2f);
654 vertex_elems[5].instance_divisor = 0;
655 vertex_elems[5].vertex_buffer_index = 1;
656 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
657
658 /* Second ref surface top field texcoord element */
659 vertex_elems[6].src_offset = 0;
660 vertex_elems[6].instance_divisor = 0;
661 vertex_elems[6].vertex_buffer_index = 2;
662 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
663
664 /* Second ref surface bottom field texcoord element */
665 vertex_elems[7].src_offset = sizeof(struct vertex2f);
666 vertex_elems[7].instance_divisor = 0;
667 vertex_elems[7].vertex_buffer_index = 2;
668 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
669
670 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
671 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
672 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
673
674 r->vs_const_buf = pipe_buffer_create
675 (
676 r->pipe->screen,
677 PIPE_BIND_CONSTANT_BUFFER,
678 sizeof(struct vertex_shader_consts)
679 );
680
681 return true;
682 }
683
684 static void
685 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
686 {
687 unsigned i;
688
689 assert(r);
690
691 pipe_resource_reference(&r->vs_const_buf, NULL);
692
693 for (i = 0; i < 3; ++i) {
694 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
695 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
696 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
697 pipe_resource_reference(&r->textures.all[i], NULL);
698 }
699
700 FREE(r->macroblock_buf);
701 }
702
703 static enum MACROBLOCK_TYPE
704 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
705 {
706 assert(mb);
707
708 switch (mb->mb_type) {
709 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
710 return MACROBLOCK_TYPE_INTRA;
711 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
712 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
713 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
714 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
715 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
716 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
717 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
718 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
719 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
720 default:
721 assert(0);
722 }
723
724 /* Unreachable */
725 return -1;
726 }
727
728 static void
729 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
730 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
731 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
732 bool use_zeroblocks, struct vertex2f *zero_blocks)
733 {
734 unsigned cbp = mb->cbp;
735 unsigned mbx = mb->mbx;
736 unsigned mby = mb->mby;
737
738 struct vertex2f v;
739
740 assert(vb);
741 assert(unit && half && offset);
742 assert(zero_blocks || !use_zeroblocks);
743
744 /* Generate vertices for two triangles covering a block */
745 v.x = mbx * unit->x + offset->x;
746 v.y = mby * unit->y + offset->y;
747
748 vb[0].pos.x = v.x;
749 vb[0].pos.y = v.y;
750 vb[1].pos.x = v.x;
751 vb[1].pos.y = v.y + half->y;
752 vb[2].pos.x = v.x + half->x;
753 vb[2].pos.y = v.y;
754 vb[3].pos.x = v.x + half->x;
755 vb[3].pos.y = v.y;
756 vb[4].pos.x = v.x;
757 vb[4].pos.y = v.y + half->y;
758 vb[5].pos.x = v.x + half->x;
759 vb[5].pos.y = v.y + half->y;
760
761 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
762 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
763 for this channel is defined for this block) */
764
765 if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
766 v.x = mbx * unit->x + offset->x;
767 v.y = mby * unit->y + offset->y;
768 }
769 else {
770 v.x = zero_blocks[0].x;
771 v.y = zero_blocks[0].y;
772 }
773
774 vb[0].luma_tc.x = v.x;
775 vb[0].luma_tc.y = v.y;
776 vb[1].luma_tc.x = v.x;
777 vb[1].luma_tc.y = v.y + half->y;
778 vb[2].luma_tc.x = v.x + half->x;
779 vb[2].luma_tc.y = v.y;
780 vb[3].luma_tc.x = v.x + half->x;
781 vb[3].luma_tc.y = v.y;
782 vb[4].luma_tc.x = v.x;
783 vb[4].luma_tc.y = v.y + half->y;
784 vb[5].luma_tc.x = v.x + half->x;
785 vb[5].luma_tc.y = v.y + half->y;
786
787 if (!use_zeroblocks || cbp & cb_mask) {
788 v.x = mbx * unit->x + offset->x;
789 v.y = mby * unit->y + offset->y;
790 }
791 else {
792 v.x = zero_blocks[1].x;
793 v.y = zero_blocks[1].y;
794 }
795
796 vb[0].cb_tc.x = v.x;
797 vb[0].cb_tc.y = v.y;
798 vb[1].cb_tc.x = v.x;
799 vb[1].cb_tc.y = v.y + half->y;
800 vb[2].cb_tc.x = v.x + half->x;
801 vb[2].cb_tc.y = v.y;
802 vb[3].cb_tc.x = v.x + half->x;
803 vb[3].cb_tc.y = v.y;
804 vb[4].cb_tc.x = v.x;
805 vb[4].cb_tc.y = v.y + half->y;
806 vb[5].cb_tc.x = v.x + half->x;
807 vb[5].cb_tc.y = v.y + half->y;
808
809 if (!use_zeroblocks || cbp & cr_mask) {
810 v.x = mbx * unit->x + offset->x;
811 v.y = mby * unit->y + offset->y;
812 }
813 else {
814 v.x = zero_blocks[2].x;
815 v.y = zero_blocks[2].y;
816 }
817
818 vb[0].cr_tc.x = v.x;
819 vb[0].cr_tc.y = v.y;
820 vb[1].cr_tc.x = v.x;
821 vb[1].cr_tc.y = v.y + half->y;
822 vb[2].cr_tc.x = v.x + half->x;
823 vb[2].cr_tc.y = v.y;
824 vb[3].cr_tc.x = v.x + half->x;
825 vb[3].cr_tc.y = v.y;
826 vb[4].cr_tc.x = v.x;
827 vb[4].cr_tc.y = v.y + half->y;
828 vb[5].cr_tc.x = v.x + half->x;
829 vb[5].cr_tc.y = v.y + half->y;
830 }
831
832 static void
833 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
834 struct pipe_mpeg12_macroblock *mb, unsigned pos,
835 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
836 {
837 struct vertex2f mo_vec[2];
838
839 unsigned i;
840
841 assert(r);
842 assert(mb);
843 assert(ycbcr_vb);
844 assert(pos < r->macroblocks_per_batch);
845
846 mo_vec[1].x = 0;
847 mo_vec[1].y = 0;
848
849 switch (mb->mb_type) {
850 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
851 {
852 struct vertex2f *vb;
853
854 assert(ref_vb && ref_vb[1]);
855
856 vb = ref_vb[1] + pos * 2 * 24;
857
858 mo_vec[0].x = mb->pmv[0][1][0];
859 mo_vec[0].y = mb->pmv[0][1][1];
860
861 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
862 for (i = 0; i < 24 * 2; i += 2) {
863 vb[i].x = mo_vec[0].x;
864 vb[i].y = mo_vec[0].y;
865 }
866 }
867 else {
868 mo_vec[1].x = mb->pmv[1][1][0];
869 mo_vec[1].y = mb->pmv[1][1][1];
870
871 for (i = 0; i < 24 * 2; i += 2) {
872 vb[i].x = mo_vec[0].x;
873 vb[i].y = mo_vec[0].y;
874 vb[i + 1].x = mo_vec[1].x;
875 vb[i + 1].y = mo_vec[1].y;
876 }
877 }
878
879 /* fall-through */
880 }
881 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
882 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
883 {
884 struct vertex2f *vb;
885
886 assert(ref_vb && ref_vb[0]);
887
888 vb = ref_vb[0] + pos * 2 * 24;
889
890 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
891 mo_vec[0].x = mb->pmv[0][1][0];
892 mo_vec[0].y = mb->pmv[0][1][1];
893
894 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
895 mo_vec[1].x = mb->pmv[1][1][0];
896 mo_vec[1].y = mb->pmv[1][1][1];
897 }
898 }
899 else {
900 mo_vec[0].x = mb->pmv[0][0][0];
901 mo_vec[0].y = mb->pmv[0][0][1];
902
903 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
904 mo_vec[1].x = mb->pmv[1][0][0];
905 mo_vec[1].y = mb->pmv[1][0][1];
906 }
907 }
908
909 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
910 for (i = 0; i < 24 * 2; i += 2) {
911 vb[i].x = mo_vec[0].x;
912 vb[i].y = mo_vec[0].y;
913 }
914 }
915 else {
916 for (i = 0; i < 24 * 2; i += 2) {
917 vb[i].x = mo_vec[0].x;
918 vb[i].y = mo_vec[0].y;
919 vb[i + 1].x = mo_vec[1].x;
920 vb[i + 1].y = mo_vec[1].y;
921 }
922 }
923
924 /* fall-through */
925 }
926 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
927 {
928 const struct vertex2f unit =
929 {
930 MACROBLOCK_WIDTH,
931 MACROBLOCK_HEIGHT
932 };
933 const struct vertex2f half =
934 {
935 (MACROBLOCK_WIDTH / 2),
936 (MACROBLOCK_HEIGHT / 2)
937 };
938 const struct vertex2f offsets[2][2] =
939 {
940 {
941 {0, 0}, {0, half.y}
942 },
943 {
944 {half.x, 0}, {half.x, half.y}
945 }
946 };
947 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
948
949 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
950
951 gen_block_verts(vb, mb,
952 &unit, &half, &offsets[0][0],
953 32, 2, 1, use_zb, r->zero_block);
954
955 gen_block_verts(vb + 6, mb,
956 &unit, &half, &offsets[1][0],
957 16, 2, 1, use_zb, r->zero_block);
958
959 gen_block_verts(vb + 12, mb,
960 &unit, &half, &offsets[0][1],
961 8, 2, 1, use_zb, r->zero_block);
962
963 gen_block_verts(vb + 18, mb,
964 &unit, &half, &offsets[1][1],
965 4, 2, 1, use_zb, r->zero_block);
966
967 break;
968 }
969 default:
970 assert(0);
971 }
972 }
973
974 static void
975 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
976 unsigned *num_macroblocks)
977 {
978 unsigned offset[NUM_MACROBLOCK_TYPES];
979 struct vert_stream_0 *ycbcr_vb;
980 struct vertex2f *ref_vb[2];
981 struct pipe_transfer *buf_transfer[3];
982 unsigned i;
983
984 assert(r);
985 assert(num_macroblocks);
986
987 for (i = 0; i < r->num_macroblocks; ++i) {
988 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
989 ++num_macroblocks[mb_type];
990 }
991
992 offset[0] = 0;
993
994 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
995 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
996
997 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
998 (
999 r->pipe,
1000 r->vertex_bufs.individual.ycbcr.buffer,
1001 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1002 &buf_transfer[0]
1003 );
1004
1005 for (i = 0; i < 2; ++i)
1006 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1007 (
1008 r->pipe,
1009 r->vertex_bufs.individual.ref[i].buffer,
1010 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1011 &buf_transfer[i + 1]
1012 );
1013
1014 for (i = 0; i < r->num_macroblocks; ++i) {
1015 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1016
1017 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1018 ycbcr_vb, ref_vb);
1019
1020 ++offset[mb_type];
1021 }
1022
1023 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1024 for (i = 0; i < 2; ++i)
1025 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1026 }
1027
1028 static struct pipe_sampler_view
1029 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1030 {
1031 struct pipe_sampler_view *sampler_view;
1032 assert(r);
1033 assert(surface);
1034
1035 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1036 if (!sampler_view) {
1037 struct pipe_sampler_view templat;
1038 boolean added_to_map;
1039
1040 u_sampler_view_default_template(&templat, surface->texture,
1041 surface->texture->format);
1042 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1043 &templat);
1044 if (!sampler_view)
1045 return NULL;
1046
1047 added_to_map = util_keymap_insert(r->texview_map, &surface,
1048 sampler_view, r->pipe);
1049 assert(added_to_map);
1050 }
1051
1052 return sampler_view;
1053 }
1054
1055 static void
1056 flush(struct vl_mpeg12_mc_renderer *r)
1057 {
1058 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1059 unsigned vb_start = 0;
1060 struct vertex_shader_consts *vs_consts;
1061 struct pipe_transfer *buf_transfer;
1062 unsigned i;
1063
1064 assert(r);
1065 assert(r->num_macroblocks == r->macroblocks_per_batch);
1066
1067 gen_macroblock_stream(r, num_macroblocks);
1068
1069 r->fb_state.cbufs[0] = r->surface;
1070
1071 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1072 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1073
1074 vs_consts = pipe_buffer_map
1075 (
1076 r->pipe, r->vs_const_buf,
1077 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1078 &buf_transfer
1079 );
1080
1081 vs_consts->norm.x = 1.0f / r->surface->width;
1082 vs_consts->norm.y = 1.0f / r->surface->height;
1083
1084 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1085
1086 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1087 r->vs_const_buf);
1088
1089 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1090 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1091 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1092 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1093 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1094 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1095 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1096
1097 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1098 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1099 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1100 }
1101
1102 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1103 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1104 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1105 r->textures.individual.ref[0] = r->past->texture;
1106 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1107 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1108 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1109 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1110 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1111
1112 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1113 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1114 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1115 }
1116
1117 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
1118 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1119 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1120 r->textures.individual.ref[0] = r->past->texture;
1121 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1122 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1123 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1124 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1125 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1126
1127 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1128 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1129 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1130 }
1131
1132 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1133 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1134 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1135 r->textures.individual.ref[0] = r->future->texture;
1136 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1137 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1138 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1139 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1140 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1141
1142 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1143 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1144 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1145 }
1146
1147 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
1148 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1149 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1150 r->textures.individual.ref[0] = r->future->texture;
1151 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1152 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1153 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1154 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1155 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1156
1157 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1158 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1159 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1160 }
1161
1162 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1163 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1164 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1165 r->textures.individual.ref[0] = r->past->texture;
1166 r->textures.individual.ref[1] = r->future->texture;
1167 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1168 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1169 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1170 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1171 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1172 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1173
1174 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1175 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1176 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1177 }
1178
1179 if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
1180 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1181 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1182 r->textures.individual.ref[0] = r->past->texture;
1183 r->textures.individual.ref[1] = r->future->texture;
1184 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1185 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1186 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1187 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1188 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1189 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1190
1191 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1192 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1193 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1194 }
1195
1196 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1197
1198 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1199 for (i = 0; i < 3; ++i)
1200 r->zero_block[i].x = ZERO_BLOCK_NIL;
1201
1202 r->num_macroblocks = 0;
1203 }
1204
1205 static void
1206 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1207 {
1208 unsigned y;
1209
1210 assert(src);
1211 assert(dst);
1212
1213 for (y = 0; y < BLOCK_HEIGHT; ++y)
1214 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1215 }
1216
1217 static void
1218 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1219 {
1220 unsigned y;
1221
1222 assert(src);
1223 assert(dst);
1224
1225 for (y = 0; y < BLOCK_HEIGHT; ++y)
1226 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1227 }
1228
1229 static void
1230 fill_frame_zero_block(short *dst, unsigned dst_pitch)
1231 {
1232 unsigned y;
1233
1234 assert(dst);
1235
1236 for (y = 0; y < BLOCK_HEIGHT; ++y)
1237 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1238 }
1239
1240 static void
1241 fill_field_zero_block(short *dst, unsigned dst_pitch)
1242 {
1243 unsigned y;
1244
1245 assert(dst);
1246
1247 for (y = 0; y < BLOCK_HEIGHT; ++y)
1248 memset(dst + y * dst_pitch * 2, 0, BLOCK_WIDTH * 2);
1249 }
1250
1251 static void
1252 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1253 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1254 {
1255 unsigned tex_pitch;
1256 short *texels;
1257 unsigned tb = 0, sb = 0;
1258 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1259 unsigned x, y;
1260
1261 assert(r);
1262 assert(blocks);
1263
1264 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1265 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1266
1267 for (y = 0; y < 2; ++y) {
1268 for (x = 0; x < 2; ++x, ++tb) {
1269 if ((cbp >> (5 - tb)) & 1) {
1270 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1271 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1272 texels + y * tex_pitch * BLOCK_HEIGHT +
1273 x * BLOCK_WIDTH, tex_pitch);
1274 }
1275 else {
1276 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1277 texels + y * tex_pitch + x * BLOCK_WIDTH,
1278 tex_pitch);
1279 }
1280
1281 ++sb;
1282 }
1283 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1284 if(dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1285
1286 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1287 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1288
1289 fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1290 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1291 r->zero_block[0].x = (mbpx + x * 8);
1292 r->zero_block[0].y = (mbpy + y * 8);
1293 }
1294 }
1295 }
1296 else {
1297
1298 fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
1299 }
1300 }
1301 }
1302 }
1303
1304 /* TODO: Implement 422, 444 */
1305 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1306
1307 mbpx /= 2;
1308 mbpy /= 2;
1309
1310 for (tb = 0; tb < 2; ++tb) {
1311 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1312 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1313
1314 if ((cbp >> (1 - tb)) & 1) {
1315 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1316 ++sb;
1317 }
1318 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1319 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1320 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1321 fill_frame_zero_block(texels, tex_pitch);
1322 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1323 r->zero_block[tb + 1].x = (mbpx << 1);
1324 r->zero_block[tb + 1].y = (mbpy << 1);
1325 }
1326 }
1327 }
1328 }
1329 }
1330
1331 static void
1332 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1333 struct pipe_mpeg12_macroblock *mb)
1334 {
1335 assert(r);
1336 assert(mb);
1337 assert(mb->blocks);
1338 assert(r->num_macroblocks < r->macroblocks_per_batch);
1339
1340 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1341 sizeof(struct pipe_mpeg12_macroblock));
1342
1343 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1344
1345 ++r->num_macroblocks;
1346 }
1347
1348 static void
1349 texview_map_delete(const struct keymap *map,
1350 const void *key, void *data,
1351 void *user)
1352 {
1353 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1354
1355 assert(map);
1356 assert(key);
1357 assert(data);
1358 assert(user);
1359
1360 pipe_sampler_view_reference(&sv, NULL);
1361 }
1362
1363 bool
1364 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1365 struct pipe_context *pipe,
1366 unsigned picture_width,
1367 unsigned picture_height,
1368 enum pipe_video_chroma_format chroma_format,
1369 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1370 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1371 bool pot_buffers)
1372 {
1373 unsigned i;
1374
1375 assert(renderer);
1376 assert(pipe);
1377 /* TODO: Implement other policies */
1378 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1379 /* TODO: Implement this */
1380 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1381 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1382 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1383 assert(pot_buffers);
1384
1385 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1386
1387 renderer->pipe = pipe;
1388 renderer->picture_width = picture_width;
1389 renderer->picture_height = picture_height;
1390 renderer->chroma_format = chroma_format;
1391 renderer->bufmode = bufmode;
1392 renderer->eb_handling = eb_handling;
1393 renderer->pot_buffers = pot_buffers;
1394
1395 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1396 texview_map_delete);
1397 if (!renderer->texview_map)
1398 return false;
1399
1400 if (!init_pipe_state(renderer)) {
1401 util_delete_keymap(renderer->texview_map, renderer->pipe);
1402 return false;
1403 }
1404 if (!init_shaders(renderer)) {
1405 util_delete_keymap(renderer->texview_map, renderer->pipe);
1406 cleanup_pipe_state(renderer);
1407 return false;
1408 }
1409 if (!init_buffers(renderer)) {
1410 util_delete_keymap(renderer->texview_map, renderer->pipe);
1411 cleanup_shaders(renderer);
1412 cleanup_pipe_state(renderer);
1413 return false;
1414 }
1415
1416 renderer->surface = NULL;
1417 renderer->past = NULL;
1418 renderer->future = NULL;
1419 for (i = 0; i < 3; ++i)
1420 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1421 renderer->num_macroblocks = 0;
1422
1423 xfer_buffers_map(renderer);
1424
1425 return true;
1426 }
1427
1428 void
1429 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1430 {
1431 assert(renderer);
1432
1433 xfer_buffers_unmap(renderer);
1434
1435 util_delete_keymap(renderer->texview_map, renderer->pipe);
1436 cleanup_pipe_state(renderer);
1437 cleanup_shaders(renderer);
1438 cleanup_buffers(renderer);
1439
1440 pipe_surface_reference(&renderer->surface, NULL);
1441 pipe_surface_reference(&renderer->past, NULL);
1442 pipe_surface_reference(&renderer->future, NULL);
1443 }
1444
1445 void
1446 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1447 *renderer,
1448 struct pipe_surface *surface,
1449 struct pipe_surface *past,
1450 struct pipe_surface *future,
1451 unsigned num_macroblocks,
1452 struct pipe_mpeg12_macroblock
1453 *mpeg12_macroblocks,
1454 struct pipe_fence_handle **fence)
1455 {
1456 bool new_surface = false;
1457
1458 assert(renderer);
1459 assert(surface);
1460 assert(num_macroblocks);
1461 assert(mpeg12_macroblocks);
1462
1463 if (renderer->surface) {
1464 if (surface != renderer->surface) {
1465 if (renderer->num_macroblocks > 0) {
1466 xfer_buffers_unmap(renderer);
1467 flush(renderer);
1468 }
1469
1470 new_surface = true;
1471 }
1472
1473 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1474 assert(surface != renderer->surface || renderer->past == past);
1475 assert(surface != renderer->surface || renderer->future == future);
1476 }
1477 else
1478 new_surface = true;
1479
1480 if (new_surface) {
1481 pipe_surface_reference(&renderer->surface, surface);
1482 pipe_surface_reference(&renderer->past, past);
1483 pipe_surface_reference(&renderer->future, future);
1484 renderer->fence = fence;
1485 }
1486
1487 while (num_macroblocks) {
1488 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1489 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1490 unsigned i;
1491
1492 for (i = 0; i < num_to_submit; ++i) {
1493 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1494 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1495 }
1496
1497 num_macroblocks -= num_to_submit;
1498
1499 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1500 xfer_buffers_unmap(renderer);
1501 flush(renderer);
1502 xfer_buffers_map(renderer);
1503 /* Next time we get this surface it may have new ref frames */
1504 pipe_surface_reference(&renderer->surface, NULL);
1505 pipe_surface_reference(&renderer->past, NULL);
1506 pipe_surface_reference(&renderer->future, NULL);
1507 }
1508 }
1509 }