[g3dvl] simplyfy shaders and fix bugs
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include "util/u_draw.h"
30 #include <assert.h>
31 #include <pipe/p_context.h>
32 #include <util/u_inlines.h>
33 #include <util/u_format.h>
34 #include <util/u_math.h>
35 #include <util/u_memory.h>
36 #include <util/u_keymap.h>
37 #include <util/u_sampler.h>
38 #include <tgsi/tgsi_ureg.h>
39
40 #define DEFAULT_BUF_ALIGNMENT 1
41 #define MACROBLOCK_WIDTH 16
42 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_WIDTH 8
44 #define BLOCK_HEIGHT 8
45 #define ZERO_BLOCK_NIL -1.0f
46 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
47 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
48
49 struct vertex_shader_consts
50 {
51 struct vertex4f norm;
52 };
53
54 struct fragment_shader_consts
55 {
56 struct vertex4f multiplier;
57 struct vertex4f div;
58 };
59
60 struct vert_stream_0
61 {
62 struct vertex2f pos;
63 struct vertex2f luma_tc;
64 struct vertex2f cb_tc;
65 struct vertex2f cr_tc;
66 };
67
68 enum MACROBLOCK_TYPE
69 {
70 MACROBLOCK_TYPE_INTRA,
71 MACROBLOCK_TYPE_FWD_FRAME_PRED,
72 MACROBLOCK_TYPE_FWD_FIELD_PRED,
73 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
74 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
75 MACROBLOCK_TYPE_BI_FRAME_PRED,
76 MACROBLOCK_TYPE_BI_FIELD_PRED,
77
78 NUM_MACROBLOCK_TYPES
79 };
80
81 static void *
82 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
83 {
84 struct ureg_program *shader;
85 struct ureg_src norm, mbs;
86 struct ureg_src vpos, vtex[3], vmv[4];
87 struct ureg_dst temp;
88 struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
89 unsigned i, j, count;
90
91 shader = ureg_create(TGSI_PROCESSOR_VERTEX);
92 if (!shader)
93 return NULL;
94
95 norm = ureg_DECL_constant(shader, 0);
96 mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
97 temp = ureg_DECL_temporary(shader);
98
99 vpos = ureg_DECL_vs_input(shader, 0);
100 o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
101
102 for (i = 0; i < 3; ++i) {
103 vtex[i] = ureg_DECL_vs_input(shader, 1 + i);
104 o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
105 }
106
107 count=0;
108 for (i = 0; i < ref_frames; ++i) {
109 for (j = 0; j < 2; ++j) {
110 if(j < mv_per_frame) {
111 vmv[count] = ureg_DECL_vs_input(shader, 4 + i * 2 + j);
112 o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
113 count++;
114 }
115 /* workaround for r600g */
116 else if(ref_frames == 2)
117 ureg_DECL_vs_input(shader, 4 + i * 2 + j);
118 }
119 }
120
121 o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
122
123 /*
124 * o_vpos = vpos * norm * mbs
125 * o_vtex[0..2] = vtex[0..2] * norm * mbs
126 * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
127 * o_frc = fract(vpos) * mbs
128 */
129 ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, mbs);
130 ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(temp), norm);
131 ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
132 for (i = 0; i < 3; ++i) {
133 ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vtex[i], mbs);
134 ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm);
135 }
136 for (i = 0; i < count; ++i) {
137 ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vmv[i],
138 ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
139 ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
140 }
141 if (mv_per_frame == 2) {
142 ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
143 ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vpos, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
144 }
145
146 ureg_release_temporary(shader, temp);
147 ureg_END(shader);
148
149 return ureg_create_shader_and_destroy(shader, r->pipe);
150 }
151
152 static void *
153 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
154 {
155 struct ureg_program *shader;
156 struct ureg_src tc[3];
157 struct ureg_src sampler[3];
158 struct ureg_dst texel, temp;
159 struct ureg_dst fragment;
160 unsigned i;
161
162 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
163 if (!shader)
164 return NULL;
165
166 for (i = 0; i < 3; ++i) {
167 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
168 sampler[i] = ureg_DECL_sampler(shader, i);
169 }
170 texel = ureg_DECL_temporary(shader);
171 temp = ureg_DECL_temporary(shader);
172 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
173
174 /*
175 * texel.r = tex(tc[0], sampler[0])
176 * texel.g = tex(tc[1], sampler[1])
177 * texel.b = tex(tc[2], sampler[2])
178 * fragment = texel * scale
179 */
180 for (i = 0; i < 3; ++i) {
181 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
182 ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
183 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
184 }
185 ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
186
187 ureg_release_temporary(shader, texel);
188 ureg_release_temporary(shader, temp);
189 ureg_END(shader);
190
191 return ureg_create_shader_and_destroy(shader, r->pipe);
192 }
193
194 static void *
195 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
196 {
197 struct ureg_program *shader;
198 struct ureg_src tc[4];
199 struct ureg_src sampler[4];
200 struct ureg_dst texel, ref;
201 struct ureg_dst fragment;
202 unsigned i;
203
204 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
205 if (!shader)
206 return NULL;
207
208 for (i = 0; i < 4; ++i) {
209 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
210 sampler[i] = ureg_DECL_sampler(shader, i);
211 }
212 texel = ureg_DECL_temporary(shader);
213 ref = ureg_DECL_temporary(shader);
214 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
215
216 /*
217 * texel.r = tex(tc[0], sampler[0])
218 * texel.g = tex(tc[1], sampler[1])
219 * texel.b = tex(tc[2], sampler[2])
220 * ref = tex(tc[3], sampler[3])
221 * fragment = texel * scale + ref
222 */
223 for (i = 0; i < 3; ++i) {
224 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
225 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
226 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
227 }
228 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
229 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
230
231 ureg_release_temporary(shader, texel);
232 ureg_release_temporary(shader, ref);
233 ureg_END(shader);
234
235 return ureg_create_shader_and_destroy(shader, r->pipe);
236 }
237
238 static void *
239 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
240 {
241 struct ureg_program *shader;
242 struct ureg_src tc[5], line;
243 struct ureg_src sampler[4];
244 struct ureg_dst texel, ref, tmp;
245 struct ureg_dst fragment;
246 unsigned i, label;
247
248 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
249 if (!shader)
250 return NULL;
251
252 for (i = 0; i < 5; ++i)
253 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
254 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
255 for (i = 0; i < 4; ++i)
256 sampler[i] = ureg_DECL_sampler(shader, i);
257
258 texel = ureg_DECL_temporary(shader);
259 ref = ureg_DECL_temporary(shader);
260 tmp = ureg_DECL_temporary(shader);
261 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
262
263 /*
264 * texel.r = tex(tc[0], sampler[0])
265 * texel.g = tex(tc[1], sampler[1])
266 * texel.b = tex(tc[2], sampler[2])
267 * if(fract & 1)
268 * ref = tex(tc[3], sampler[3])
269 * else
270 * ref = tex(tc[4], sampler[3])
271 * fragment = texel * scale + ref
272 */
273 for (i = 0; i < 3; ++i) {
274 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
275 ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
276 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
277 }
278
279 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
280 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
281 ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
282 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
283 ureg_ELSE(shader, &label);
284 ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
285 ureg_ENDIF(shader);
286
287 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
288
289 ureg_release_temporary(shader, tmp);
290 ureg_release_temporary(shader, texel);
291 ureg_release_temporary(shader, ref);
292 ureg_END(shader);
293
294 return ureg_create_shader_and_destroy(shader, r->pipe);
295 }
296
297 static void *
298 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
299 {
300 struct ureg_program *shader;
301 struct ureg_src tc[5];
302 struct ureg_src sampler[5];
303 struct ureg_dst texel, ref[2], tmp;
304 struct ureg_dst fragment;
305 unsigned i;
306
307 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
308 if (!shader)
309 return NULL;
310
311 for (i = 0; i < 5; ++i) {
312 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
313 sampler[i] = ureg_DECL_sampler(shader, i);
314 }
315 texel = ureg_DECL_temporary(shader);
316 ref[0] = ureg_DECL_temporary(shader);
317 ref[1] = ureg_DECL_temporary(shader);
318 tmp = ureg_DECL_temporary(shader);
319 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
320
321 /*
322 * texel.r = tex(tc[0], sampler[0])
323 * texel.g = tex(tc[1], sampler[1])
324 * texel.b = tex(tc[2], sampler[2])
325 * ref[0..1 = tex(tc[3..4], sampler[3..4])
326 * ref[0] = lerp(ref[0], ref[1], 0.5)
327 * fragment = texel * scale + ref[0]
328 */
329 for (i = 0; i < 3; ++i) {
330 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
331 ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
332 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
333 }
334 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
335 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
336 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
337
338 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
339
340 ureg_release_temporary(shader, tmp);
341 ureg_release_temporary(shader, texel);
342 ureg_release_temporary(shader, ref[0]);
343 ureg_release_temporary(shader, ref[1]);
344 ureg_END(shader);
345
346 return ureg_create_shader_and_destroy(shader, r->pipe);
347 }
348
349 static void *
350 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
351 {
352 struct ureg_program *shader;
353 struct ureg_src tc[7], line;
354 struct ureg_src sampler[5];
355 struct ureg_dst texel, ref[2], tmp;
356 struct ureg_dst fragment;
357 unsigned i, label;
358
359 shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
360 if (!shader)
361 return NULL;
362
363 for (i = 0; i < 7; ++i)
364 tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
365 line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
366 for (i = 0; i < 5; ++i)
367 sampler[i] = ureg_DECL_sampler(shader, i);
368
369 texel = ureg_DECL_temporary(shader);
370 ref[0] = ureg_DECL_temporary(shader);
371 ref[1] = ureg_DECL_temporary(shader);
372 tmp = ureg_DECL_temporary(shader);
373 fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
374
375 /*
376 * texel.r = tex(tc[0], sampler[0])
377 * texel.g = tex(tc[1], sampler[1])
378 * texel.b = tex(tc[2], sampler[2])
379 * ref[0..1 = tex(tc[3..4], sampler[3..4])
380 * ref[0] = lerp(ref[0], ref[1], 0.5)
381 * fragment = texel * scale + ref[0]
382 */
383 for (i = 0; i < 3; ++i) {
384 /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
385 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
386 ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
387 }
388
389 ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
390 ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
391 ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
392 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[4], sampler[3]);
393 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[6], sampler[4]);
394 ureg_ELSE(shader, &label);
395 ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
396 ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[5], sampler[4]);
397 ureg_ENDIF(shader);
398
399 ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
400
401 ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
402
403 ureg_release_temporary(shader, tmp);
404 ureg_release_temporary(shader, texel);
405 ureg_release_temporary(shader, ref[0]);
406 ureg_release_temporary(shader, ref[1]);
407 ureg_END(shader);
408
409 return ureg_create_shader_and_destroy(shader, r->pipe);
410 }
411
412 static void
413 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
414 {
415 unsigned i;
416
417 assert(r);
418
419 for (i = 0; i < 3; ++i) {
420 struct pipe_box rect =
421 {
422 0, 0, 0,
423 r->textures.all[i]->width0,
424 r->textures.all[i]->height0,
425 1
426 };
427
428 r->tex_transfer[i] = r->pipe->get_transfer
429 (
430 r->pipe, r->textures.all[i],
431 u_subresource(0, 0),
432 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
433 &rect
434 );
435
436 r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
437 }
438 }
439
440 static void
441 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
442 {
443 unsigned i;
444
445 assert(r);
446
447 for (i = 0; i < 3; ++i) {
448 r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
449 r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
450 }
451 }
452
453 static bool
454 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
455 {
456 struct pipe_sampler_state sampler;
457 unsigned filters[5];
458 unsigned i;
459
460 assert(r);
461
462 r->viewport.scale[0] = r->pot_buffers ?
463 util_next_power_of_two(r->picture_width) : r->picture_width;
464 r->viewport.scale[1] = r->pot_buffers ?
465 util_next_power_of_two(r->picture_height) : r->picture_height;
466 r->viewport.scale[2] = 1;
467 r->viewport.scale[3] = 1;
468 r->viewport.translate[0] = 0;
469 r->viewport.translate[1] = 0;
470 r->viewport.translate[2] = 0;
471 r->viewport.translate[3] = 0;
472
473 r->fb_state.width = r->pot_buffers ?
474 util_next_power_of_two(r->picture_width) : r->picture_width;
475 r->fb_state.height = r->pot_buffers ?
476 util_next_power_of_two(r->picture_height) : r->picture_height;
477 r->fb_state.nr_cbufs = 1;
478 r->fb_state.zsbuf = NULL;
479
480 /* Luma filter */
481 filters[0] = PIPE_TEX_FILTER_NEAREST;
482 /* Chroma filters */
483 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
484 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
485 filters[1] = PIPE_TEX_FILTER_NEAREST;
486 filters[2] = PIPE_TEX_FILTER_NEAREST;
487 }
488 else {
489 filters[1] = PIPE_TEX_FILTER_LINEAR;
490 filters[2] = PIPE_TEX_FILTER_LINEAR;
491 }
492 /* Fwd, bkwd ref filters */
493 filters[3] = PIPE_TEX_FILTER_LINEAR;
494 filters[4] = PIPE_TEX_FILTER_LINEAR;
495
496 for (i = 0; i < 5; ++i) {
497 memset(&sampler, 0, sizeof(sampler));
498 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
499 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
500 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
501 sampler.min_img_filter = filters[i];
502 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
503 sampler.mag_img_filter = filters[i];
504 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
505 sampler.compare_func = PIPE_FUNC_ALWAYS;
506 sampler.normalized_coords = 1;
507 /*sampler.shadow_ambient = ; */
508 /*sampler.lod_bias = ; */
509 sampler.min_lod = 0;
510 /*sampler.max_lod = ; */
511 /*sampler.border_color[i] = ; */
512 /*sampler.max_anisotropy = ; */
513 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
514 }
515
516 return true;
517 }
518
519 static void
520 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
521 {
522 unsigned i;
523
524 assert(r);
525
526 for (i = 0; i < 5; ++i)
527 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
528 }
529
530 static bool
531 init_shaders(struct vl_mpeg12_mc_renderer *r)
532 {
533 assert(r);
534
535 assert(r->i_vs = create_vert_shader(r, 0, 0));
536 assert(r->i_fs = create_intra_frag_shader(r));
537
538 assert(r->p_vs[0] = create_vert_shader(r, 1, 1));
539 assert(r->p_vs[1] = create_vert_shader(r, 1, 2));
540 assert(r->p_fs[0] = create_frame_pred_frag_shader(r));
541 assert(r->p_fs[1] = create_field_pred_frag_shader(r));
542
543 assert(r->b_vs[0] = create_vert_shader(r, 2, 1));
544 assert(r->b_vs[1] = create_vert_shader(r, 2, 2));
545 assert(r->b_fs[0] = create_frame_bi_pred_frag_shader(r));
546 assert(r->b_fs[1] = create_field_bi_pred_frag_shader(r));
547
548 return true;
549 }
550
551 static void
552 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
553 {
554 assert(r);
555
556 r->pipe->delete_vs_state(r->pipe, r->i_vs);
557 r->pipe->delete_fs_state(r->pipe, r->i_fs);
558 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
559 r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
560 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
561 r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
562 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
563 r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
564 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
565 r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
566 }
567
568 static bool
569 init_buffers(struct vl_mpeg12_mc_renderer *r)
570 {
571 struct pipe_resource template;
572 struct pipe_vertex_element vertex_elems[8];
573 struct pipe_sampler_view sampler_view;
574
575 const unsigned mbw =
576 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
577 const unsigned mbh =
578 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
579
580 unsigned i;
581
582 assert(r);
583
584 r->macroblocks_per_batch =
585 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
586 r->num_macroblocks = 0;
587 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
588
589 memset(&template, 0, sizeof(struct pipe_resource));
590 template.target = PIPE_TEXTURE_2D;
591 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
592 template.format = PIPE_FORMAT_R16_SNORM;
593 template.last_level = 0;
594 template.width0 = r->pot_buffers ?
595 util_next_power_of_two(r->picture_width) : r->picture_width;
596 template.height0 = r->pot_buffers ?
597 util_next_power_of_two(r->picture_height) : r->picture_height;
598 template.depth0 = 1;
599 template.usage = PIPE_USAGE_DYNAMIC;
600 template.bind = PIPE_BIND_SAMPLER_VIEW;
601 template.flags = 0;
602
603 r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
604
605 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
606 template.width0 = r->pot_buffers ?
607 util_next_power_of_two(r->picture_width / 2) :
608 r->picture_width / 2;
609 template.height0 = r->pot_buffers ?
610 util_next_power_of_two(r->picture_height / 2) :
611 r->picture_height / 2;
612 }
613 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
614 template.height0 = r->pot_buffers ?
615 util_next_power_of_two(r->picture_height / 2) :
616 r->picture_height / 2;
617
618 r->textures.individual.cb =
619 r->pipe->screen->resource_create(r->pipe->screen, &template);
620 r->textures.individual.cr =
621 r->pipe->screen->resource_create(r->pipe->screen, &template);
622
623 for (i = 0; i < 3; ++i) {
624 u_sampler_view_default_template(&sampler_view,
625 r->textures.all[i],
626 r->textures.all[i]->format);
627 r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
628 }
629
630 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
631 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
632 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
633 /* XXX: Create with usage DYNAMIC or STREAM */
634 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
635 (
636 r->pipe->screen,
637 PIPE_BIND_VERTEX_BUFFER,
638 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
639 );
640
641 for (i = 1; i < 3; ++i) {
642 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
643 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
644 r->vertex_bufs.all[i].buffer_offset = 0;
645 /* XXX: Create with usage DYNAMIC or STREAM */
646 r->vertex_bufs.all[i].buffer = pipe_buffer_create
647 (
648 r->pipe->screen,
649 PIPE_BIND_VERTEX_BUFFER,
650 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
651 );
652 }
653
654 memset(&vertex_elems, 0, sizeof(vertex_elems));
655
656 /* Position element */
657 vertex_elems[0].src_offset = 0;
658 vertex_elems[0].instance_divisor = 0;
659 vertex_elems[0].vertex_buffer_index = 0;
660 vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
661
662 /* Luma, texcoord element */
663 vertex_elems[1].src_offset = sizeof(struct vertex2f);
664 vertex_elems[1].instance_divisor = 0;
665 vertex_elems[1].vertex_buffer_index = 0;
666 vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
667
668 /* Chroma Cr texcoord element */
669 vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
670 vertex_elems[2].instance_divisor = 0;
671 vertex_elems[2].vertex_buffer_index = 0;
672 vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
673
674 /* Chroma Cb texcoord element */
675 vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
676 vertex_elems[3].instance_divisor = 0;
677 vertex_elems[3].vertex_buffer_index = 0;
678 vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
679
680 /* First ref surface top field texcoord element */
681 vertex_elems[4].src_offset = 0;
682 vertex_elems[4].instance_divisor = 0;
683 vertex_elems[4].vertex_buffer_index = 1;
684 vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
685
686 /* First ref surface bottom field texcoord element */
687 vertex_elems[5].src_offset = sizeof(struct vertex2f);
688 vertex_elems[5].instance_divisor = 0;
689 vertex_elems[5].vertex_buffer_index = 1;
690 vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
691
692 /* Second ref surface top field texcoord element */
693 vertex_elems[6].src_offset = 0;
694 vertex_elems[6].instance_divisor = 0;
695 vertex_elems[6].vertex_buffer_index = 2;
696 vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
697
698 /* Second ref surface bottom field texcoord element */
699 vertex_elems[7].src_offset = sizeof(struct vertex2f);
700 vertex_elems[7].instance_divisor = 0;
701 vertex_elems[7].vertex_buffer_index = 2;
702 vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
703
704 r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
705 r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
706 r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
707
708 r->vs_const_buf = pipe_buffer_create
709 (
710 r->pipe->screen,
711 PIPE_BIND_CONSTANT_BUFFER,
712 sizeof(struct vertex_shader_consts)
713 );
714
715 return true;
716 }
717
718 static void
719 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
720 {
721 unsigned i;
722
723 assert(r);
724
725 pipe_resource_reference(&r->vs_const_buf, NULL);
726
727 for (i = 0; i < 3; ++i) {
728 pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
729 r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
730 pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
731 pipe_resource_reference(&r->textures.all[i], NULL);
732 }
733
734 FREE(r->macroblock_buf);
735 }
736
737 static enum MACROBLOCK_TYPE
738 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
739 {
740 assert(mb);
741
742 switch (mb->mb_type) {
743 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
744 return MACROBLOCK_TYPE_INTRA;
745 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
746 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
747 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
748 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
749 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
750 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
751 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
752 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
753 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
754 default:
755 assert(0);
756 }
757
758 /* Unreachable */
759 return -1;
760 }
761
762 static void
763 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
764 const struct vertex2f *offset,
765 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
766 bool use_zeroblocks, struct vertex2f *zero_blocks)
767 {
768 unsigned cbp = mb->cbp;
769 unsigned mbx = mb->mbx;
770 unsigned mby = mb->mby;
771
772 const struct vertex2f half =
773 {
774 0.5f,
775 0.5f
776 };
777 struct vertex2f v;
778
779 assert(vb);
780 assert(offset);
781 assert(zero_blocks || !use_zeroblocks);
782
783 /* Generate vertices for two triangles covering a block */
784 v.x = mbx + offset->x;
785 v.y = mby + offset->y;
786
787 vb[0].pos.x = v.x;
788 vb[0].pos.y = v.y;
789 vb[1].pos.x = v.x;
790 vb[1].pos.y = v.y + half.y;
791 vb[2].pos.x = v.x + half.x;
792 vb[2].pos.y = v.y;
793 vb[3].pos.x = v.x + half.x;
794 vb[3].pos.y = v.y;
795 vb[4].pos.x = v.x;
796 vb[4].pos.y = v.y + half.y;
797 vb[5].pos.x = v.x + half.x;
798 vb[5].pos.y = v.y + half.y;
799
800 /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
801 or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
802 for this channel is defined for this block) */
803
804 if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
805 v.x = mbx + offset->x;
806 v.y = mby + offset->y;
807 }
808 else {
809 v.x = zero_blocks[0].x;
810 v.y = zero_blocks[0].y;
811 }
812
813 vb[0].luma_tc.x = v.x;
814 vb[0].luma_tc.y = v.y;
815 vb[1].luma_tc.x = v.x;
816 vb[1].luma_tc.y = v.y + half.y;
817 vb[2].luma_tc.x = v.x + half.x;
818 vb[2].luma_tc.y = v.y;
819 vb[3].luma_tc.x = v.x + half.x;
820 vb[3].luma_tc.y = v.y;
821 vb[4].luma_tc.x = v.x;
822 vb[4].luma_tc.y = v.y + half.y;
823 vb[5].luma_tc.x = v.x + half.x;
824 vb[5].luma_tc.y = v.y + half.y;
825
826 if (!use_zeroblocks || cbp & cb_mask) {
827 v.x = mbx + offset->x;
828 v.y = mby + offset->y;
829 }
830 else {
831 v.x = zero_blocks[1].x;
832 v.y = zero_blocks[1].y;
833 }
834
835 vb[0].cb_tc.x = v.x;
836 vb[0].cb_tc.y = v.y;
837 vb[1].cb_tc.x = v.x;
838 vb[1].cb_tc.y = v.y + half.y;
839 vb[2].cb_tc.x = v.x + half.x;
840 vb[2].cb_tc.y = v.y;
841 vb[3].cb_tc.x = v.x + half.x;
842 vb[3].cb_tc.y = v.y;
843 vb[4].cb_tc.x = v.x;
844 vb[4].cb_tc.y = v.y + half.y;
845 vb[5].cb_tc.x = v.x + half.x;
846 vb[5].cb_tc.y = v.y + half.y;
847
848 if (!use_zeroblocks || cbp & cr_mask) {
849 v.x = mbx + offset->x;
850 v.y = mby + offset->y;
851 }
852 else {
853 v.x = zero_blocks[2].x;
854 v.y = zero_blocks[2].y;
855 }
856
857 vb[0].cr_tc.x = v.x;
858 vb[0].cr_tc.y = v.y;
859 vb[1].cr_tc.x = v.x;
860 vb[1].cr_tc.y = v.y + half.y;
861 vb[2].cr_tc.x = v.x + half.x;
862 vb[2].cr_tc.y = v.y;
863 vb[3].cr_tc.x = v.x + half.x;
864 vb[3].cr_tc.y = v.y;
865 vb[4].cr_tc.x = v.x;
866 vb[4].cr_tc.y = v.y + half.y;
867 vb[5].cr_tc.x = v.x + half.x;
868 vb[5].cr_tc.y = v.y + half.y;
869 }
870
871 static void
872 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
873 struct pipe_mpeg12_macroblock *mb, unsigned pos,
874 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
875 {
876 struct vertex2f mo_vec[2];
877
878 unsigned i;
879
880 assert(r);
881 assert(mb);
882 assert(ycbcr_vb);
883 assert(pos < r->macroblocks_per_batch);
884
885 mo_vec[1].x = 0;
886 mo_vec[1].y = 0;
887
888 switch (mb->mb_type) {
889 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
890 {
891 struct vertex2f *vb;
892
893 assert(ref_vb && ref_vb[1]);
894
895 vb = ref_vb[1] + pos * 2 * 24;
896
897 mo_vec[0].x = mb->pmv[0][1][0];
898 mo_vec[0].y = mb->pmv[0][1][1];
899
900 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
901 for (i = 0; i < 24 * 2; i += 2) {
902 vb[i].x = mo_vec[0].x;
903 vb[i].y = mo_vec[0].y;
904 }
905 }
906 else {
907 mo_vec[1].x = mb->pmv[1][1][0];
908 mo_vec[1].y = mb->pmv[1][1][1];
909
910 for (i = 0; i < 24 * 2; i += 2) {
911 vb[i].x = mo_vec[0].x;
912 vb[i].y = mo_vec[0].y;
913 vb[i + 1].x = mo_vec[1].x;
914 vb[i + 1].y = mo_vec[1].y;
915 }
916 }
917
918 /* fall-through */
919 }
920 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
921 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
922 {
923 struct vertex2f *vb;
924
925 assert(ref_vb && ref_vb[0]);
926
927 vb = ref_vb[0] + pos * 2 * 24;
928
929 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
930 mo_vec[0].x = mb->pmv[0][1][0];
931 mo_vec[0].y = mb->pmv[0][1][1];
932
933 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
934 mo_vec[1].x = mb->pmv[1][1][0];
935 mo_vec[1].y = mb->pmv[1][1][1];
936 }
937 }
938 else {
939 mo_vec[0].x = mb->pmv[0][0][0];
940 mo_vec[0].y = mb->pmv[0][0][1];
941
942 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
943 mo_vec[1].x = mb->pmv[1][0][0];
944 mo_vec[1].y = mb->pmv[1][0][1];
945 }
946 }
947
948 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
949 for (i = 0; i < 24 * 2; i += 2) {
950 vb[i].x = mo_vec[0].x;
951 vb[i].y = mo_vec[0].y;
952 }
953 }
954 else {
955 for (i = 0; i < 24 * 2; i += 2) {
956 vb[i].x = mo_vec[0].x;
957 vb[i].y = mo_vec[0].y;
958 vb[i + 1].x = mo_vec[1].x;
959 vb[i + 1].y = mo_vec[1].y;
960 }
961 }
962
963 /* fall-through */
964 }
965 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
966 {
967 const struct vertex2f offsets[2][2] =
968 {
969 {
970 {0, 0}, {0, 0.5f}
971 },
972 {
973 {0.5f, 0}, {0.5f, 0.5f}
974 }
975 };
976 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
977
978 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
979
980 gen_block_verts(vb, mb, &offsets[0][0],
981 32, 2, 1, use_zb, r->zero_block);
982
983 gen_block_verts(vb + 6, mb, &offsets[1][0],
984 16, 2, 1, use_zb, r->zero_block);
985
986 gen_block_verts(vb + 12, mb, &offsets[0][1],
987 8, 2, 1, use_zb, r->zero_block);
988
989 gen_block_verts(vb + 18, mb, &offsets[1][1],
990 4, 2, 1, use_zb, r->zero_block);
991
992 break;
993 }
994 default:
995 assert(0);
996 }
997 }
998
999 static void
1000 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1001 unsigned *num_macroblocks)
1002 {
1003 unsigned offset[NUM_MACROBLOCK_TYPES];
1004 struct vert_stream_0 *ycbcr_vb;
1005 struct vertex2f *ref_vb[2];
1006 struct pipe_transfer *buf_transfer[3];
1007 unsigned i;
1008
1009 assert(r);
1010 assert(num_macroblocks);
1011
1012 for (i = 0; i < r->num_macroblocks; ++i) {
1013 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1014 ++num_macroblocks[mb_type];
1015 }
1016
1017 offset[0] = 0;
1018
1019 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1020 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1021
1022 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1023 (
1024 r->pipe,
1025 r->vertex_bufs.individual.ycbcr.buffer,
1026 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1027 &buf_transfer[0]
1028 );
1029
1030 for (i = 0; i < 2; ++i)
1031 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1032 (
1033 r->pipe,
1034 r->vertex_bufs.individual.ref[i].buffer,
1035 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1036 &buf_transfer[i + 1]
1037 );
1038
1039 for (i = 0; i < r->num_macroblocks; ++i) {
1040 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1041
1042 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1043 ycbcr_vb, ref_vb);
1044
1045 ++offset[mb_type];
1046 }
1047
1048 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
1049 for (i = 0; i < 2; ++i)
1050 pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
1051 }
1052
1053 static struct pipe_sampler_view
1054 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
1055 {
1056 struct pipe_sampler_view *sampler_view;
1057 assert(r);
1058 assert(surface);
1059
1060 sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
1061 if (!sampler_view) {
1062 struct pipe_sampler_view templat;
1063 boolean added_to_map;
1064
1065 u_sampler_view_default_template(&templat, surface->texture,
1066 surface->texture->format);
1067 sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
1068 &templat);
1069 if (!sampler_view)
1070 return NULL;
1071
1072 added_to_map = util_keymap_insert(r->texview_map, &surface,
1073 sampler_view, r->pipe);
1074 assert(added_to_map);
1075 }
1076
1077 return sampler_view;
1078 }
1079
1080 static void
1081 flush(struct vl_mpeg12_mc_renderer *r)
1082 {
1083 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1084 unsigned vb_start = 0;
1085 struct vertex_shader_consts *vs_consts;
1086 struct pipe_transfer *buf_transfer;
1087 unsigned i;
1088
1089 assert(r);
1090 assert(r->num_macroblocks == r->macroblocks_per_batch);
1091
1092 gen_macroblock_stream(r, num_macroblocks);
1093
1094 r->fb_state.cbufs[0] = r->surface;
1095
1096 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1097 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1098
1099 vs_consts = pipe_buffer_map
1100 (
1101 r->pipe, r->vs_const_buf,
1102 PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
1103 &buf_transfer
1104 );
1105
1106 vs_consts->norm.x = 1.0f / r->surface->width;
1107 vs_consts->norm.y = 1.0f / r->surface->height;
1108
1109 pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
1110
1111 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1112 r->vs_const_buf);
1113
1114 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1115 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1116 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
1117 r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
1118 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1119 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1120 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1121
1122 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1123 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1124 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1125 }
1126
1127 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1128 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1129 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1130 r->textures.individual.ref[0] = r->past->texture;
1131 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1132 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1133 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1134 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1135 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1136
1137 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1138 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1139 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1140 }
1141
1142 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
1143 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1144 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1145 r->textures.individual.ref[0] = r->past->texture;
1146 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1147 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1148 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1149 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1150 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1151
1152 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1153 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1154 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1155 }
1156
1157 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1158 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1159 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1160 r->textures.individual.ref[0] = r->future->texture;
1161 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1162 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1163 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1164 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1165 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1166
1167 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1168 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1169 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1170 }
1171
1172 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
1173 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1174 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
1175 r->textures.individual.ref[0] = r->future->texture;
1176 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
1177 r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
1178 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1179 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1180 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1181
1182 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1183 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1184 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1185 }
1186
1187 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1188 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1189 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1190 r->textures.individual.ref[0] = r->past->texture;
1191 r->textures.individual.ref[1] = r->future->texture;
1192 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1193 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1194 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1195 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1196 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1197 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1198
1199 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1200 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1201 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1202 }
1203
1204 if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
1205 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1206 r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
1207 r->textures.individual.ref[0] = r->past->texture;
1208 r->textures.individual.ref[1] = r->future->texture;
1209 r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
1210 r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
1211 r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
1212 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1213 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1214 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1215
1216 util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1217 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1218 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1219 }
1220
1221 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1222
1223 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1224 for (i = 0; i < 3; ++i)
1225 r->zero_block[i].x = ZERO_BLOCK_NIL;
1226
1227 r->num_macroblocks = 0;
1228 }
1229
1230 static void
1231 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1232 {
1233 unsigned y;
1234
1235 assert(src);
1236 assert(dst);
1237
1238 for (y = 0; y < BLOCK_HEIGHT; ++y)
1239 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1240 }
1241
1242 static void
1243 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1244 {
1245 unsigned y;
1246
1247 assert(src);
1248 assert(dst);
1249
1250 for (y = 0; y < BLOCK_HEIGHT; ++y)
1251 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1252 }
1253
1254 static void
1255 fill_frame_zero_block(short *dst, unsigned dst_pitch)
1256 {
1257 unsigned y;
1258
1259 assert(dst);
1260
1261 for (y = 0; y < BLOCK_HEIGHT; ++y)
1262 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1263 }
1264
1265 static void
1266 fill_field_zero_block(short *dst, unsigned dst_pitch)
1267 {
1268 unsigned y;
1269
1270 assert(dst);
1271
1272 for (y = 0; y < BLOCK_HEIGHT; ++y)
1273 memset(dst + y * dst_pitch * 2, 0, BLOCK_WIDTH * 2);
1274 }
1275
1276 static void
1277 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1278 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1279 {
1280 unsigned tex_pitch;
1281 short *texels;
1282 unsigned tb = 0, sb = 0;
1283 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1284 unsigned x, y;
1285
1286 assert(r);
1287 assert(blocks);
1288
1289 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
1290 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1291
1292 for (y = 0; y < 2; ++y) {
1293 for (x = 0; x < 2; ++x, ++tb) {
1294 if ((cbp >> (5 - tb)) & 1) {
1295 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1296 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1297 texels + y * tex_pitch * BLOCK_HEIGHT +
1298 x * BLOCK_WIDTH, tex_pitch);
1299 }
1300 else {
1301 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1302 texels + y * tex_pitch + x * BLOCK_WIDTH,
1303 tex_pitch);
1304 }
1305
1306 ++sb;
1307 }
1308 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1309 if(dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1310
1311 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1312 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1313
1314 fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1315 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1316 r->zero_block[0].x = (mbx + x * 0.5f);
1317 r->zero_block[0].y = (mby + y * 0.5f);
1318 }
1319 }
1320 }
1321 else {
1322
1323 fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
1324 }
1325 }
1326 }
1327 }
1328
1329 /* TODO: Implement 422, 444 */
1330 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1331
1332 mbpx /= 2;
1333 mbpy /= 2;
1334
1335 for (tb = 0; tb < 2; ++tb) {
1336 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
1337 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1338
1339 if ((cbp >> (1 - tb)) & 1) {
1340 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1341 ++sb;
1342 }
1343 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1344 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1345 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1346 fill_frame_zero_block(texels, tex_pitch);
1347 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1348 r->zero_block[tb + 1].x = mbx;
1349 r->zero_block[tb + 1].y = mby;
1350 }
1351 }
1352 }
1353 }
1354 }
1355
1356 static void
1357 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1358 struct pipe_mpeg12_macroblock *mb)
1359 {
1360 assert(r);
1361 assert(mb);
1362 assert(mb->blocks);
1363 assert(r->num_macroblocks < r->macroblocks_per_batch);
1364
1365 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1366 sizeof(struct pipe_mpeg12_macroblock));
1367
1368 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1369
1370 ++r->num_macroblocks;
1371 }
1372
1373 static void
1374 texview_map_delete(const struct keymap *map,
1375 const void *key, void *data,
1376 void *user)
1377 {
1378 struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
1379
1380 assert(map);
1381 assert(key);
1382 assert(data);
1383 assert(user);
1384
1385 pipe_sampler_view_reference(&sv, NULL);
1386 }
1387
1388 bool
1389 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1390 struct pipe_context *pipe,
1391 unsigned picture_width,
1392 unsigned picture_height,
1393 enum pipe_video_chroma_format chroma_format,
1394 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1395 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1396 bool pot_buffers)
1397 {
1398 unsigned i;
1399
1400 assert(renderer);
1401 assert(pipe);
1402 /* TODO: Implement other policies */
1403 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1404 /* TODO: Implement this */
1405 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1406 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1407 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1408 assert(pot_buffers);
1409
1410 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1411
1412 renderer->pipe = pipe;
1413 renderer->picture_width = picture_width;
1414 renderer->picture_height = picture_height;
1415 renderer->chroma_format = chroma_format;
1416 renderer->bufmode = bufmode;
1417 renderer->eb_handling = eb_handling;
1418 renderer->pot_buffers = pot_buffers;
1419
1420 renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
1421 texview_map_delete);
1422 if (!renderer->texview_map)
1423 return false;
1424
1425 if (!init_pipe_state(renderer)) {
1426 util_delete_keymap(renderer->texview_map, renderer->pipe);
1427 return false;
1428 }
1429 if (!init_shaders(renderer)) {
1430 util_delete_keymap(renderer->texview_map, renderer->pipe);
1431 cleanup_pipe_state(renderer);
1432 return false;
1433 }
1434 if (!init_buffers(renderer)) {
1435 util_delete_keymap(renderer->texview_map, renderer->pipe);
1436 cleanup_shaders(renderer);
1437 cleanup_pipe_state(renderer);
1438 return false;
1439 }
1440
1441 renderer->surface = NULL;
1442 renderer->past = NULL;
1443 renderer->future = NULL;
1444 for (i = 0; i < 3; ++i)
1445 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1446 renderer->num_macroblocks = 0;
1447
1448 xfer_buffers_map(renderer);
1449
1450 return true;
1451 }
1452
1453 void
1454 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1455 {
1456 assert(renderer);
1457
1458 xfer_buffers_unmap(renderer);
1459
1460 util_delete_keymap(renderer->texview_map, renderer->pipe);
1461 cleanup_pipe_state(renderer);
1462 cleanup_shaders(renderer);
1463 cleanup_buffers(renderer);
1464
1465 pipe_surface_reference(&renderer->surface, NULL);
1466 pipe_surface_reference(&renderer->past, NULL);
1467 pipe_surface_reference(&renderer->future, NULL);
1468 }
1469
1470 void
1471 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1472 *renderer,
1473 struct pipe_surface *surface,
1474 struct pipe_surface *past,
1475 struct pipe_surface *future,
1476 unsigned num_macroblocks,
1477 struct pipe_mpeg12_macroblock
1478 *mpeg12_macroblocks,
1479 struct pipe_fence_handle **fence)
1480 {
1481 bool new_surface = false;
1482
1483 assert(renderer);
1484 assert(surface);
1485 assert(num_macroblocks);
1486 assert(mpeg12_macroblocks);
1487
1488 if (renderer->surface) {
1489 if (surface != renderer->surface) {
1490 if (renderer->num_macroblocks > 0) {
1491 xfer_buffers_unmap(renderer);
1492 flush(renderer);
1493 }
1494
1495 new_surface = true;
1496 }
1497
1498 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1499 assert(surface != renderer->surface || renderer->past == past);
1500 assert(surface != renderer->surface || renderer->future == future);
1501 }
1502 else
1503 new_surface = true;
1504
1505 if (new_surface) {
1506 pipe_surface_reference(&renderer->surface, surface);
1507 pipe_surface_reference(&renderer->past, past);
1508 pipe_surface_reference(&renderer->future, future);
1509 renderer->fence = fence;
1510 }
1511
1512 while (num_macroblocks) {
1513 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1514 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1515 unsigned i;
1516
1517 for (i = 0; i < num_to_submit; ++i) {
1518 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1519 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1520 }
1521
1522 num_macroblocks -= num_to_submit;
1523
1524 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1525 xfer_buffers_unmap(renderer);
1526 flush(renderer);
1527 xfer_buffers_map(renderer);
1528 /* Next time we get this surface it may have new ref frames */
1529 pipe_surface_reference(&renderer->surface, NULL);
1530 pipe_surface_reference(&renderer->past, NULL);
1531 pipe_surface_reference(&renderer->future, NULL);
1532 }
1533 }
1534 }