ab196c21f8782fa82db79f1ae18a6e6dc7f4a511
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <tgsi/tgsi_parse.h>
36 #include <tgsi/tgsi_build.h>
37 #include "vl_shader_build.h"
38
39 #define DEFAULT_BUF_ALIGNMENT 1
40 #define MACROBLOCK_WIDTH 16
41 #define MACROBLOCK_HEIGHT 16
42 #define BLOCK_WIDTH 8
43 #define BLOCK_HEIGHT 8
44 #define ZERO_BLOCK_NIL -1.0f
45 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
46
47 struct vertex2f
48 {
49 float x, y;
50 };
51
52 struct vertex4f
53 {
54 float x, y, z, w;
55 };
56
57 struct vertex_shader_consts
58 {
59 struct vertex4f denorm;
60 };
61
62 struct fragment_shader_consts
63 {
64 struct vertex4f multiplier;
65 struct vertex4f div;
66 };
67
68 /*
69 * Muliplier renormalizes block samples from 16 bits to 12 bits.
70 * Divider is used when calculating Y % 2 for choosing top or bottom
71 * field for P or B macroblocks.
72 * TODO: Use immediates.
73 */
74 static const struct fragment_shader_consts fs_consts = {
75 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
76 {0.5f, 2.0f, 0.0f, 0.0f}
77 };
78
79 struct vert_stream_0
80 {
81 struct vertex2f pos;
82 struct vertex2f luma_tc;
83 struct vertex2f cb_tc;
84 struct vertex2f cr_tc;
85 };
86
87 enum MACROBLOCK_TYPE
88 {
89 MACROBLOCK_TYPE_INTRA,
90 MACROBLOCK_TYPE_FWD_FRAME_PRED,
91 MACROBLOCK_TYPE_FWD_FIELD_PRED,
92 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
93 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
94 MACROBLOCK_TYPE_BI_FRAME_PRED,
95 MACROBLOCK_TYPE_BI_FIELD_PRED,
96
97 NUM_MACROBLOCK_TYPES
98 };
99
100 static void
101 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
102 {
103 const unsigned max_tokens = 50;
104
105 struct pipe_shader_state vs;
106 struct tgsi_token *tokens;
107 struct tgsi_header *header;
108
109 struct tgsi_full_declaration decl;
110 struct tgsi_full_instruction inst;
111
112 unsigned ti;
113
114 unsigned i;
115
116 assert(r);
117
118 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
119 header = (struct tgsi_header *) &tokens[0];
120 *header = tgsi_build_header();
121 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
122
123 ti = 2;
124
125 /*
126 * decl i0 ; Vertex pos
127 * decl i1 ; Luma texcoords
128 * decl i2 ; Chroma Cb texcoords
129 * decl i3 ; Chroma Cr texcoords
130 */
131 for (i = 0; i < 4; i++) {
132 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
133 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
134 }
135
136 /*
137 * decl o0 ; Vertex pos
138 * decl o1 ; Luma texcoords
139 * decl o2 ; Chroma Cb texcoords
140 * decl o3 ; Chroma Cr texcoords
141 */
142 for (i = 0; i < 4; i++) {
143 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
144 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
145 }
146
147 /*
148 * mov o0, i0 ; Move input vertex pos to output
149 * mov o1, i1 ; Move input luma texcoords to output
150 * mov o2, i2 ; Move input chroma Cb texcoords to output
151 * mov o3, i3 ; Move input chroma Cr texcoords to output
152 */
153 for (i = 0; i < 4; ++i) {
154 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
155 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
156 }
157
158 /* end */
159 inst = vl_end();
160 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
161
162 assert(ti <= max_tokens);
163
164 vs.tokens = tokens;
165 r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
166 free(tokens);
167 }
168
169 static void
170 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
171 {
172 const unsigned max_tokens = 100;
173
174 struct pipe_shader_state fs;
175 struct tgsi_token *tokens;
176 struct tgsi_header *header;
177
178 struct tgsi_full_declaration decl;
179 struct tgsi_full_instruction inst;
180
181 unsigned ti;
182
183 unsigned i;
184
185 assert(r);
186
187 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
188 header = (struct tgsi_header *) &tokens[0];
189 *header = tgsi_build_header();
190 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
191
192 ti = 2;
193
194 /*
195 * decl i0 ; Luma texcoords
196 * decl i1 ; Chroma Cb texcoords
197 * decl i2 ; Chroma Cr texcoords
198 */
199 for (i = 0; i < 3; ++i) {
200 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
201 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
202 }
203
204 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
205 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
206 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
207
208 /* decl o0 ; Fragment color */
209 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
210 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
211
212 /* decl t0, t1 */
213 decl = vl_decl_temps(0, 1);
214 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
215
216 /*
217 * decl s0 ; Sampler for luma texture
218 * decl s1 ; Sampler for chroma Cb texture
219 * decl s2 ; Sampler for chroma Cr texture
220 */
221 for (i = 0; i < 3; ++i) {
222 decl = vl_decl_samplers(i, i);
223 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
224 }
225
226 /*
227 * tex2d t1, i0, s0 ; Read texel from luma texture
228 * mov t0.x, t1.x ; Move luma sample into .x component
229 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
230 * mov t0.y, t1.x ; Move Cb sample into .y component
231 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
232 * mov t0.z, t1.x ; Move Cr sample into .z component
233 */
234 for (i = 0; i < 3; ++i) {
235 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
236 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
237
238 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
239 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
240 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
241 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
242 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
243 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
244 }
245
246 /* mul o0, t0, c0 ; Rescale texel to correct range */
247 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
248 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
249
250 /* end */
251 inst = vl_end();
252 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
253
254 assert(ti <= max_tokens);
255
256 fs.tokens = tokens;
257 r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
258 free(tokens);
259 }
260
261 static void
262 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
263 {
264 const unsigned max_tokens = 100;
265
266 struct pipe_shader_state vs;
267 struct tgsi_token *tokens;
268 struct tgsi_header *header;
269
270 struct tgsi_full_declaration decl;
271 struct tgsi_full_instruction inst;
272
273 unsigned ti;
274
275 unsigned i;
276
277 assert(r);
278
279 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
280 header = (struct tgsi_header *) &tokens[0];
281 *header = tgsi_build_header();
282 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
283
284 ti = 2;
285
286 /*
287 * decl i0 ; Vertex pos
288 * decl i1 ; Luma texcoords
289 * decl i2 ; Chroma Cb texcoords
290 * decl i3 ; Chroma Cr texcoords
291 * decl i4 ; Ref surface top field texcoords
292 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
293 */
294 for (i = 0; i < 6; i++) {
295 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
296 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
297 }
298
299 /*
300 * decl o0 ; Vertex pos
301 * decl o1 ; Luma texcoords
302 * decl o2 ; Chroma Cb texcoords
303 * decl o3 ; Chroma Cr texcoords
304 * decl o4 ; Ref macroblock texcoords
305 */
306 for (i = 0; i < 5; i++) {
307 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
308 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
309 }
310
311 /*
312 * mov o0, i0 ; Move input vertex pos to output
313 * mov o1, i1 ; Move input luma texcoords to output
314 * mov o2, i2 ; Move input chroma Cb texcoords to output
315 * mov o3, i3 ; Move input chroma Cr texcoords to output
316 */
317 for (i = 0; i < 4; ++i) {
318 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
319 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
320 }
321
322 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
323 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
324 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
325
326 /* end */
327 inst = vl_end();
328 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
329
330 assert(ti <= max_tokens);
331
332 vs.tokens = tokens;
333 r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
334 free(tokens);
335 }
336
337 static void
338 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
339 {
340 assert(false);
341 }
342
343 static void
344 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
345 {
346 const unsigned max_tokens = 100;
347
348 struct pipe_shader_state fs;
349 struct tgsi_token *tokens;
350 struct tgsi_header *header;
351
352 struct tgsi_full_declaration decl;
353 struct tgsi_full_instruction inst;
354
355 unsigned ti;
356
357 unsigned i;
358
359 assert(r);
360
361 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
362 header = (struct tgsi_header *) &tokens[0];
363 *header = tgsi_build_header();
364 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
365
366 ti = 2;
367
368 /*
369 * decl i0 ; Luma texcoords
370 * decl i1 ; Chroma Cb texcoords
371 * decl i2 ; Chroma Cr texcoords
372 * decl i3 ; Ref macroblock texcoords
373 */
374 for (i = 0; i < 4; ++i) {
375 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
376 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
377 }
378
379 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
380 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
381 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
382
383 /* decl o0 ; Fragment color */
384 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
385 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
386
387 /* decl t0, t1 */
388 decl = vl_decl_temps(0, 1);
389 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
390
391 /*
392 * decl s0 ; Sampler for luma texture
393 * decl s1 ; Sampler for chroma Cb texture
394 * decl s2 ; Sampler for chroma Cr texture
395 * decl s3 ; Sampler for ref surface texture
396 */
397 for (i = 0; i < 4; ++i) {
398 decl = vl_decl_samplers(i, i);
399 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
400 }
401
402 /*
403 * tex2d t1, i0, s0 ; Read texel from luma texture
404 * mov t0.x, t1.x ; Move luma sample into .x component
405 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
406 * mov t0.y, t1.x ; Move Cb sample into .y component
407 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
408 * mov t0.z, t1.x ; Move Cr sample into .z component
409 */
410 for (i = 0; i < 3; ++i) {
411 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
412 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
413
414 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
415 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
416 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
417 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
418 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
419 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
420 }
421
422 /* mul t0, t0, c0 ; Rescale texel to correct range */
423 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
424 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
425
426 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
427 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
428 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
429
430 /* add o0, t0, t1 ; Add ref and differential to form final output */
431 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
432 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
433
434 /* end */
435 inst = vl_end();
436 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
437
438 assert(ti <= max_tokens);
439
440 fs.tokens = tokens;
441 r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
442 free(tokens);
443 }
444
445 static void
446 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
447 {
448 assert(false);
449 }
450
451 static void
452 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
453 {
454 const unsigned max_tokens = 100;
455
456 struct pipe_shader_state vs;
457 struct tgsi_token *tokens;
458 struct tgsi_header *header;
459
460 struct tgsi_full_declaration decl;
461 struct tgsi_full_instruction inst;
462
463 unsigned ti;
464
465 unsigned i;
466
467 assert(r);
468
469 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
470 header = (struct tgsi_header *) &tokens[0];
471 *header = tgsi_build_header();
472 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
473
474 ti = 2;
475
476 /*
477 * decl i0 ; Vertex pos
478 * decl i1 ; Luma texcoords
479 * decl i2 ; Chroma Cb texcoords
480 * decl i3 ; Chroma Cr texcoords
481 * decl i4 ; First ref macroblock top field texcoords
482 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
483 * decl i6 ; Second ref macroblock top field texcoords
484 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
485 */
486 for (i = 0; i < 8; i++) {
487 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
488 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
489 }
490
491 /*
492 * decl o0 ; Vertex pos
493 * decl o1 ; Luma texcoords
494 * decl o2 ; Chroma Cb texcoords
495 * decl o3 ; Chroma Cr texcoords
496 * decl o4 ; First ref macroblock texcoords
497 * decl o5 ; Second ref macroblock texcoords
498 */
499 for (i = 0; i < 6; i++) {
500 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
501 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
502 }
503
504 /*
505 * mov o0, i0 ; Move input vertex pos to output
506 * mov o1, i1 ; Move input luma texcoords to output
507 * mov o2, i2 ; Move input chroma Cb texcoords to output
508 * mov o3, i3 ; Move input chroma Cr texcoords to output
509 */
510 for (i = 0; i < 4; ++i) {
511 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
512 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
513 }
514
515 /*
516 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
517 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
518 */
519 for (i = 0; i < 2; ++i) {
520 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
521 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
522 }
523
524 /* end */
525 inst = vl_end();
526 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
527
528 assert(ti <= max_tokens);
529
530 vs.tokens = tokens;
531 r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
532 free(tokens);
533 }
534
535 static void
536 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
537 {
538 assert(false);
539 }
540
541 static void
542 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
543 {
544 const unsigned max_tokens = 100;
545
546 struct pipe_shader_state fs;
547 struct tgsi_token *tokens;
548 struct tgsi_header *header;
549
550 struct tgsi_full_declaration decl;
551 struct tgsi_full_instruction inst;
552
553 unsigned ti;
554
555 unsigned i;
556
557 assert(r);
558
559 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
560 header = (struct tgsi_header *) &tokens[0];
561 *header = tgsi_build_header();
562 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
563
564 ti = 2;
565
566 /*
567 * decl i0 ; Luma texcoords
568 * decl i1 ; Chroma Cb texcoords
569 * decl i2 ; Chroma Cr texcoords
570 * decl i3 ; First ref macroblock texcoords
571 * decl i4 ; Second ref macroblock texcoords
572 */
573 for (i = 0; i < 5; ++i) {
574 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
575 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
576 }
577
578 /*
579 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
580 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
581 */
582 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
583 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
584
585 /* decl o0 ; Fragment color */
586 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
587 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
588
589 /* decl t0-t2 */
590 decl = vl_decl_temps(0, 2);
591 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
592
593 /*
594 * decl s0 ; Sampler for luma texture
595 * decl s1 ; Sampler for chroma Cb texture
596 * decl s2 ; Sampler for chroma Cr texture
597 * decl s3 ; Sampler for first ref surface texture
598 * decl s4 ; Sampler for second ref surface texture
599 */
600 for (i = 0; i < 5; ++i) {
601 decl = vl_decl_samplers(i, i);
602 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
603 }
604
605 /*
606 * tex2d t1, i0, s0 ; Read texel from luma texture
607 * mov t0.x, t1.x ; Move luma sample into .x component
608 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
609 * mov t0.y, t1.x ; Move Cb sample into .y component
610 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
611 * mov t0.z, t1.x ; Move Cr sample into .z component
612 */
613 for (i = 0; i < 3; ++i) {
614 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
615 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
616
617 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
618 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
619 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
620 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
621 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
622 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
623 }
624
625 /* mul t0, t0, c0 ; Rescale texel to correct range */
626 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
627 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
628
629 /*
630 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
631 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
632 */
633 for (i = 0; i < 2; ++i) {
634 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
635 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
636 }
637
638 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
639 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
640 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
641 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
642 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
643 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
644 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
645
646 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
647 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
648 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
649
650 /* end */
651 inst = vl_end();
652 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
653
654 assert(ti <= max_tokens);
655
656 fs.tokens = tokens;
657 r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
658 free(tokens);
659 }
660
661 static void
662 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
663 {
664 assert(false);
665 }
666
667 static void
668 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
669 {
670 unsigned i;
671
672 assert(r);
673
674 for (i = 0; i < 3; ++i) {
675 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
676 (
677 r->pipe->screen, r->textures.all[i],
678 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
679 r->textures.all[i]->width0, r->textures.all[i]->height0
680 );
681
682 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
683 }
684 }
685
686 static void
687 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
688 {
689 unsigned i;
690
691 assert(r);
692
693 for (i = 0; i < 3; ++i) {
694 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
695 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
696 }
697 }
698
699 static bool
700 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
701 {
702 struct pipe_sampler_state sampler;
703 unsigned filters[5];
704 unsigned i;
705
706 assert(r);
707
708 r->viewport.scale[0] = r->pot_buffers ?
709 util_next_power_of_two(r->picture_width) : r->picture_width;
710 r->viewport.scale[1] = r->pot_buffers ?
711 util_next_power_of_two(r->picture_height) : r->picture_height;
712 r->viewport.scale[2] = 1;
713 r->viewport.scale[3] = 1;
714 r->viewport.translate[0] = 0;
715 r->viewport.translate[1] = 0;
716 r->viewport.translate[2] = 0;
717 r->viewport.translate[3] = 0;
718
719 r->scissor.maxx = r->pot_buffers ?
720 util_next_power_of_two(r->picture_width) : r->picture_width;
721 r->scissor.maxy = r->pot_buffers ?
722 util_next_power_of_two(r->picture_height) : r->picture_height;
723
724 r->fb_state.width = r->pot_buffers ?
725 util_next_power_of_two(r->picture_width) : r->picture_width;
726 r->fb_state.height = r->pot_buffers ?
727 util_next_power_of_two(r->picture_height) : r->picture_height;
728 r->fb_state.nr_cbufs = 1;
729 r->fb_state.zsbuf = NULL;
730
731 /* Luma filter */
732 filters[0] = PIPE_TEX_FILTER_NEAREST;
733 /* Chroma filters */
734 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
735 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
736 filters[1] = PIPE_TEX_FILTER_NEAREST;
737 filters[2] = PIPE_TEX_FILTER_NEAREST;
738 }
739 else {
740 filters[1] = PIPE_TEX_FILTER_LINEAR;
741 filters[2] = PIPE_TEX_FILTER_LINEAR;
742 }
743 /* Fwd, bkwd ref filters */
744 filters[3] = PIPE_TEX_FILTER_LINEAR;
745 filters[4] = PIPE_TEX_FILTER_LINEAR;
746
747 for (i = 0; i < 5; ++i) {
748 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
749 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
750 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
751 sampler.min_img_filter = filters[i];
752 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
753 sampler.mag_img_filter = filters[i];
754 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
755 sampler.compare_func = PIPE_FUNC_ALWAYS;
756 sampler.normalized_coords = 1;
757 /*sampler.prefilter = ; */
758 /*sampler.shadow_ambient = ; */
759 /*sampler.lod_bias = ; */
760 sampler.min_lod = 0;
761 /*sampler.max_lod = ; */
762 /*sampler.border_color[i] = ; */
763 /*sampler.max_anisotropy = ; */
764 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
765 }
766
767 return true;
768 }
769
770 static void
771 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
772 {
773 unsigned i;
774
775 assert(r);
776
777 for (i = 0; i < 5; ++i)
778 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
779 }
780
781 static bool
782 init_shaders(struct vl_mpeg12_mc_renderer *r)
783 {
784 assert(r);
785
786 create_intra_vert_shader(r);
787 create_intra_frag_shader(r);
788 create_frame_pred_vert_shader(r);
789 create_frame_pred_frag_shader(r);
790 create_frame_bi_pred_vert_shader(r);
791 create_frame_bi_pred_frag_shader(r);
792
793 return true;
794 }
795
796 static void
797 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
798 {
799 assert(r);
800
801 r->pipe->delete_vs_state(r->pipe, r->i_vs);
802 r->pipe->delete_fs_state(r->pipe, r->i_fs);
803 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
804 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
805 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
806 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
807 }
808
809 static bool
810 init_buffers(struct vl_mpeg12_mc_renderer *r)
811 {
812 struct pipe_texture template;
813
814 const unsigned mbw =
815 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
816 const unsigned mbh =
817 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
818
819 unsigned i;
820
821 assert(r);
822
823 r->macroblocks_per_batch =
824 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
825 r->num_macroblocks = 0;
826 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
827
828 memset(&template, 0, sizeof(struct pipe_texture));
829 template.target = PIPE_TEXTURE_2D;
830 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
831 template.format = PIPE_FORMAT_R16_SNORM;
832 template.last_level = 0;
833 template.width0 = r->pot_buffers ?
834 util_next_power_of_two(r->picture_width) : r->picture_width;
835 template.height0 = r->pot_buffers ?
836 util_next_power_of_two(r->picture_height) : r->picture_height;
837 template.depth0 = 1;
838 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
839
840 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
841
842 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
843 template.width0 = r->pot_buffers ?
844 util_next_power_of_two(r->picture_width / 2) :
845 r->picture_width / 2;
846 template.height0 = r->pot_buffers ?
847 util_next_power_of_two(r->picture_height / 2) :
848 r->picture_height / 2;
849 }
850 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
851 template.height0 = r->pot_buffers ?
852 util_next_power_of_two(r->picture_height / 2) :
853 r->picture_height / 2;
854
855 r->textures.individual.cb =
856 r->pipe->screen->texture_create(r->pipe->screen, &template);
857 r->textures.individual.cr =
858 r->pipe->screen->texture_create(r->pipe->screen, &template);
859
860 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
861 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
862 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
863 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
864 (
865 r->pipe->screen,
866 DEFAULT_BUF_ALIGNMENT,
867 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
868 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
869 );
870
871 for (i = 1; i < 3; ++i) {
872 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
873 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
874 r->vertex_bufs.all[i].buffer_offset = 0;
875 r->vertex_bufs.all[i].buffer = pipe_buffer_create
876 (
877 r->pipe->screen,
878 DEFAULT_BUF_ALIGNMENT,
879 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
880 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
881 );
882 }
883
884 /* Position element */
885 r->vertex_elems[0].src_offset = 0;
886 r->vertex_elems[0].vertex_buffer_index = 0;
887 r->vertex_elems[0].nr_components = 2;
888 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
889
890 /* Luma, texcoord element */
891 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
892 r->vertex_elems[1].vertex_buffer_index = 0;
893 r->vertex_elems[1].nr_components = 2;
894 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
895
896 /* Chroma Cr texcoord element */
897 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
898 r->vertex_elems[2].vertex_buffer_index = 0;
899 r->vertex_elems[2].nr_components = 2;
900 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
901
902 /* Chroma Cb texcoord element */
903 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
904 r->vertex_elems[3].vertex_buffer_index = 0;
905 r->vertex_elems[3].nr_components = 2;
906 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
907
908 /* First ref surface top field texcoord element */
909 r->vertex_elems[4].src_offset = 0;
910 r->vertex_elems[4].vertex_buffer_index = 1;
911 r->vertex_elems[4].nr_components = 2;
912 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
913
914 /* First ref surface bottom field texcoord element */
915 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
916 r->vertex_elems[5].vertex_buffer_index = 1;
917 r->vertex_elems[5].nr_components = 2;
918 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
919
920 /* Second ref surface top field texcoord element */
921 r->vertex_elems[6].src_offset = 0;
922 r->vertex_elems[6].vertex_buffer_index = 2;
923 r->vertex_elems[6].nr_components = 2;
924 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
925
926 /* Second ref surface bottom field texcoord element */
927 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
928 r->vertex_elems[7].vertex_buffer_index = 2;
929 r->vertex_elems[7].nr_components = 2;
930 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
931
932 r->vs_const_buf.buffer = pipe_buffer_create
933 (
934 r->pipe->screen,
935 DEFAULT_BUF_ALIGNMENT,
936 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
937 sizeof(struct vertex_shader_consts)
938 );
939
940 r->fs_const_buf.buffer = pipe_buffer_create
941 (
942 r->pipe->screen,
943 DEFAULT_BUF_ALIGNMENT,
944 PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
945 );
946
947 memcpy
948 (
949 pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
950 &fs_consts, sizeof(struct fragment_shader_consts)
951 );
952
953 pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
954
955 return true;
956 }
957
958 static void
959 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
960 {
961 unsigned i;
962
963 assert(r);
964
965 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
966 pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
967
968 for (i = 0; i < 3; ++i)
969 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
970
971 for (i = 0; i < 3; ++i)
972 pipe_texture_reference(&r->textures.all[i], NULL);
973
974 FREE(r->macroblock_buf);
975 }
976
977 static enum MACROBLOCK_TYPE
978 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
979 {
980 assert(mb);
981
982 switch (mb->mb_type) {
983 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
984 return MACROBLOCK_TYPE_INTRA;
985 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
986 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
987 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
988 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
989 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
990 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
991 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
992 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
993 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
994 default:
995 assert(0);
996 }
997
998 /* Unreachable */
999 return -1;
1000 }
1001
1002 /* XXX: One of these days this will have to be killed with fire */
1003 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1004 do { \
1005 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1006 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1007 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1008 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1009 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1010 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1011 \
1012 if (!use_zb || (cbp) & (lm)) \
1013 { \
1014 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1015 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1016 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1017 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1018 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1019 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1020 } \
1021 else \
1022 { \
1023 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1024 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1025 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1026 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1027 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1028 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1029 } \
1030 \
1031 if (!use_zb || (cbp) & (cbm)) \
1032 { \
1033 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1034 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1035 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1036 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1037 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1038 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1039 } \
1040 else \
1041 { \
1042 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1043 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1044 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1045 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1046 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1047 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1048 } \
1049 \
1050 if (!use_zb || (cbp) & (crm)) \
1051 { \
1052 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1053 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1054 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1055 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1056 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1057 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1058 } \
1059 else \
1060 { \
1061 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1062 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1063 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1064 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1065 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1066 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1067 } \
1068 } while (0)
1069
1070 static void
1071 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
1072 struct pipe_mpeg12_macroblock *mb, unsigned pos,
1073 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
1074 {
1075 struct vertex2f mo_vec[2];
1076
1077 unsigned i;
1078
1079 assert(r);
1080 assert(mb);
1081 assert(ycbcr_vb);
1082 assert(pos < r->macroblocks_per_batch);
1083
1084 switch (mb->mb_type) {
1085 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1086 {
1087 struct vertex2f *vb;
1088
1089 assert(ref_vb && ref_vb[1]);
1090
1091 vb = ref_vb[1] + pos * 2 * 24;
1092
1093 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1094 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1095
1096 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1097 for (i = 0; i < 24 * 2; i += 2) {
1098 vb[i].x = mo_vec[0].x;
1099 vb[i].y = mo_vec[0].y;
1100 }
1101 }
1102 else {
1103 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1104 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1105
1106 for (i = 0; i < 24 * 2; i += 2) {
1107 vb[i].x = mo_vec[0].x;
1108 vb[i].y = mo_vec[0].y;
1109 vb[i + 1].x = mo_vec[1].x;
1110 vb[i + 1].y = mo_vec[1].y;
1111 }
1112 }
1113
1114 /* fall-through */
1115 }
1116 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1117 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1118 {
1119 struct vertex2f *vb;
1120
1121 assert(ref_vb && ref_vb[0]);
1122
1123 vb = ref_vb[0] + pos * 2 * 24;
1124
1125 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
1126 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1127 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1128
1129 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1130 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1131 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1132 }
1133 }
1134 else {
1135 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
1136 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
1137
1138 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1139 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
1140 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
1141 }
1142 }
1143
1144 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1145 for (i = 0; i < 24 * 2; i += 2) {
1146 vb[i].x = mo_vec[0].x;
1147 vb[i].y = mo_vec[0].y;
1148 }
1149 }
1150 else {
1151 for (i = 0; i < 24 * 2; i += 2) {
1152 vb[i].x = mo_vec[0].x;
1153 vb[i].y = mo_vec[0].y;
1154 vb[i + 1].x = mo_vec[1].x;
1155 vb[i + 1].y = mo_vec[1].y;
1156 }
1157 }
1158
1159 /* fall-through */
1160 }
1161 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
1162 {
1163 const struct vertex2f unit =
1164 {
1165 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
1166 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
1167 };
1168 const struct vertex2f half =
1169 {
1170 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
1171 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
1172 };
1173 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
1174
1175 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
1176
1177 SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
1178 unit.x, unit.y, 0, 0, half.x, half.y,
1179 32, 2, 1, use_zb, r->zero_block);
1180
1181 SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
1182 unit.x, unit.y, half.x, 0, half.x, half.y,
1183 16, 2, 1, use_zb, r->zero_block);
1184
1185 SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
1186 unit.x, unit.y, 0, half.y, half.x, half.y,
1187 8, 2, 1, use_zb, r->zero_block);
1188
1189 SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
1190 unit.x, unit.y, half.x, half.y, half.x, half.y,
1191 4, 2, 1, use_zb, r->zero_block);
1192
1193 break;
1194 }
1195 default:
1196 assert(0);
1197 }
1198 }
1199
1200 static void
1201 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1202 unsigned *num_macroblocks)
1203 {
1204 unsigned offset[NUM_MACROBLOCK_TYPES];
1205 struct vert_stream_0 *ycbcr_vb;
1206 struct vertex2f *ref_vb[2];
1207 unsigned i;
1208
1209 assert(r);
1210 assert(num_macroblocks);
1211
1212 for (i = 0; i < r->num_macroblocks; ++i) {
1213 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1214 ++num_macroblocks[mb_type];
1215 }
1216
1217 offset[0] = 0;
1218
1219 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1220 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1221
1222 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1223 (
1224 r->pipe->screen,
1225 r->vertex_bufs.individual.ycbcr.buffer,
1226 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1227 );
1228
1229 for (i = 0; i < 2; ++i)
1230 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1231 (
1232 r->pipe->screen,
1233 r->vertex_bufs.individual.ref[i].buffer,
1234 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1235 );
1236
1237 for (i = 0; i < r->num_macroblocks; ++i) {
1238 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1239
1240 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1241 ycbcr_vb, ref_vb);
1242
1243 ++offset[mb_type];
1244 }
1245
1246 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
1247 for (i = 0; i < 2; ++i)
1248 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
1249 }
1250
1251 static void
1252 flush(struct vl_mpeg12_mc_renderer *r)
1253 {
1254 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1255 unsigned vb_start = 0;
1256 struct vertex_shader_consts *vs_consts;
1257 unsigned i;
1258
1259 assert(r);
1260 assert(r->num_macroblocks == r->macroblocks_per_batch);
1261
1262 gen_macroblock_stream(r, num_macroblocks);
1263
1264 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1265 (
1266 r->pipe->screen, r->surface,
1267 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1268 );
1269
1270 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1271 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1272 r->pipe->set_scissor_state(r->pipe, &r->scissor);
1273
1274 vs_consts = pipe_buffer_map
1275 (
1276 r->pipe->screen, r->vs_const_buf.buffer,
1277 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1278 );
1279
1280 vs_consts->denorm.x = r->surface->width0;
1281 vs_consts->denorm.y = r->surface->height0;
1282
1283 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1284
1285 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1286 &r->vs_const_buf);
1287 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
1288 &r->fs_const_buf);
1289
1290 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1291 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1292 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1293 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1294 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1295 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1296 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1297
1298 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1299 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1300 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1301 }
1302
1303 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1304 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1305 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1306 r->textures.individual.ref[0] = r->past;
1307 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1308 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1309 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1310 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1311
1312 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1313 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1314 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1315 }
1316
1317 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1318 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1319 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1320 r->textures.individual.ref[0] = r->past;
1321 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1322 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1323 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1324 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1325
1326 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1327 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1328 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1329 }
1330
1331 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1332 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1333 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1334 r->textures.individual.ref[0] = r->future;
1335 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1336 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1337 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1338 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1339
1340 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1341 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1342 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1343 }
1344
1345 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1346 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1347 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1348 r->textures.individual.ref[0] = r->future;
1349 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1350 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1351 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1352 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1353
1354 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1355 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1356 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1357 }
1358
1359 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1360 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1361 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1362 r->textures.individual.ref[0] = r->past;
1363 r->textures.individual.ref[1] = r->future;
1364 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1365 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1366 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1367 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1368
1369 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1370 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1371 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1372 }
1373
1374 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1375 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1376 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1377 r->textures.individual.ref[0] = r->past;
1378 r->textures.individual.ref[1] = r->future;
1379 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1380 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1381 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1382 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1383
1384 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1385 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1386 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1387 }
1388
1389 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1390 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1391
1392 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1393 for (i = 0; i < 3; ++i)
1394 r->zero_block[i].x = ZERO_BLOCK_NIL;
1395
1396 r->num_macroblocks = 0;
1397 }
1398
1399 static void
1400 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1401 {
1402 unsigned y;
1403
1404 assert(src);
1405 assert(dst);
1406
1407 for (y = 0; y < BLOCK_HEIGHT; ++y)
1408 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1409 }
1410
1411 static void
1412 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1413 {
1414 unsigned y;
1415
1416 assert(src);
1417 assert(dst);
1418
1419 for (y = 0; y < BLOCK_HEIGHT; ++y)
1420 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1421 }
1422
1423 static void
1424 fill_zero_block(short *dst, unsigned dst_pitch)
1425 {
1426 unsigned y;
1427
1428 assert(dst);
1429
1430 for (y = 0; y < BLOCK_HEIGHT; ++y)
1431 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1432 }
1433
1434 static void
1435 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1436 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1437 {
1438 unsigned tex_pitch;
1439 short *texels;
1440 unsigned tb = 0, sb = 0;
1441 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1442 unsigned x, y;
1443
1444 assert(r);
1445 assert(blocks);
1446
1447 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
1448 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1449
1450 for (y = 0; y < 2; ++y) {
1451 for (x = 0; x < 2; ++x, ++tb) {
1452 if ((cbp >> (5 - tb)) & 1) {
1453 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1454 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1455 texels + y * tex_pitch * BLOCK_WIDTH +
1456 x * BLOCK_WIDTH, tex_pitch);
1457 }
1458 else {
1459 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1460 texels + y * tex_pitch + x * BLOCK_WIDTH,
1461 tex_pitch);
1462 }
1463
1464 ++sb;
1465 }
1466 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1467 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1468 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1469 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1470 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1471 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1472 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1473 }
1474 }
1475 }
1476 }
1477 }
1478
1479 /* TODO: Implement 422, 444 */
1480 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1481
1482 mbpx /= 2;
1483 mbpy /= 2;
1484
1485 for (tb = 0; tb < 2; ++tb) {
1486 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1487 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1488
1489 if ((cbp >> (1 - tb)) & 1) {
1490 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1491 ++sb;
1492 }
1493 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1494 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1495 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1496 fill_zero_block(texels, tex_pitch);
1497 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1498 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1499 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1500 }
1501 }
1502 }
1503 }
1504 }
1505
1506 static void
1507 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1508 struct pipe_mpeg12_macroblock *mb)
1509 {
1510 assert(r);
1511 assert(mb);
1512 assert(r->num_macroblocks < r->macroblocks_per_batch);
1513
1514 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1515 sizeof(struct pipe_mpeg12_macroblock));
1516
1517 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1518
1519 ++r->num_macroblocks;
1520 }
1521
1522 bool
1523 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1524 struct pipe_context *pipe,
1525 unsigned picture_width,
1526 unsigned picture_height,
1527 enum pipe_video_chroma_format chroma_format,
1528 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1529 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1530 bool pot_buffers)
1531 {
1532 unsigned i;
1533
1534 assert(renderer);
1535 assert(pipe);
1536 /* TODO: Implement other policies */
1537 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1538 /* TODO: Implement this */
1539 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1540 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1541 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1542 assert(pot_buffers);
1543
1544 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1545
1546 renderer->pipe = pipe;
1547 renderer->picture_width = picture_width;
1548 renderer->picture_height = picture_height;
1549 renderer->chroma_format = chroma_format;
1550 renderer->bufmode = bufmode;
1551 renderer->eb_handling = eb_handling;
1552 renderer->pot_buffers = pot_buffers;
1553
1554 if (!init_pipe_state(renderer))
1555 return false;
1556 if (!init_shaders(renderer)) {
1557 cleanup_pipe_state(renderer);
1558 return false;
1559 }
1560 if (!init_buffers(renderer)) {
1561 cleanup_shaders(renderer);
1562 cleanup_pipe_state(renderer);
1563 return false;
1564 }
1565
1566 renderer->surface = NULL;
1567 renderer->past = NULL;
1568 renderer->future = NULL;
1569 for (i = 0; i < 3; ++i)
1570 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1571 renderer->num_macroblocks = 0;
1572
1573 xfer_buffers_map(renderer);
1574
1575 return true;
1576 }
1577
1578 void
1579 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1580 {
1581 assert(renderer);
1582
1583 xfer_buffers_unmap(renderer);
1584
1585 cleanup_pipe_state(renderer);
1586 cleanup_shaders(renderer);
1587 cleanup_buffers(renderer);
1588 }
1589
1590 void
1591 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1592 *renderer,
1593 struct pipe_texture *surface,
1594 struct pipe_texture *past,
1595 struct pipe_texture *future,
1596 unsigned num_macroblocks,
1597 struct pipe_mpeg12_macroblock
1598 *mpeg12_macroblocks,
1599 struct pipe_fence_handle **fence)
1600 {
1601 bool new_surface = false;
1602
1603 assert(renderer);
1604 assert(surface);
1605 assert(num_macroblocks);
1606 assert(mpeg12_macroblocks);
1607
1608 if (renderer->surface) {
1609 if (surface != renderer->surface) {
1610 if (renderer->num_macroblocks > 0) {
1611 xfer_buffers_unmap(renderer);
1612 flush(renderer);
1613 }
1614
1615 new_surface = true;
1616 }
1617
1618 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1619 assert(surface != renderer->surface || renderer->past == past);
1620 assert(surface != renderer->surface || renderer->future == future);
1621 }
1622 else
1623 new_surface = true;
1624
1625 if (new_surface) {
1626 renderer->surface = surface;
1627 renderer->past = past;
1628 renderer->future = future;
1629 renderer->fence = fence;
1630 renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
1631 renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
1632 }
1633
1634 while (num_macroblocks) {
1635 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1636 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1637 unsigned i;
1638
1639 for (i = 0; i < num_to_submit; ++i) {
1640 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1641 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1642 }
1643
1644 num_macroblocks -= num_to_submit;
1645
1646 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1647 xfer_buffers_unmap(renderer);
1648 flush(renderer);
1649 xfer_buffers_map(renderer);
1650 /* Next time we get this surface it may have new ref frames */
1651 renderer->surface = NULL;
1652 }
1653 }
1654 }