7e73c5ced9fdd3ec7990a00a34153534b2312716
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 #include "vl_mpeg12_mc_renderer.h"
2 #include <assert.h>
3 #include <pipe/p_context.h>
4 #include <pipe/p_inlines.h>
5 #include <util/u_math.h>
6 #include <util/u_memory.h>
7 #include <tgsi/tgsi_parse.h>
8 #include <tgsi/tgsi_build.h>
9 #include "vl_shader_build.h"
10
11 #define DEFAULT_BUF_ALIGNMENT 1
12 #define MACROBLOCK_WIDTH 16
13 #define MACROBLOCK_HEIGHT 16
14 #define BLOCK_WIDTH 8
15 #define BLOCK_HEIGHT 8
16 #define ZERO_BLOCK_NIL -1.0f
17 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
18
19 struct vertex2f
20 {
21 float x, y;
22 };
23
24 struct vertex4f
25 {
26 float x, y, z, w;
27 };
28
29 struct vertex_shader_consts
30 {
31 struct vertex4f denorm;
32 };
33
34 struct fragment_shader_consts
35 {
36 struct vertex4f multiplier;
37 struct vertex4f div;
38 };
39
40 /*
41 * Muliplier renormalizes block samples from 16 bits to 12 bits.
42 * Divider is used when calculating Y % 2 for choosing top or bottom
43 * field for P or B macroblocks.
44 * TODO: Use immediates.
45 */
46 static const struct fragment_shader_consts fs_consts = {
47 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
48 {0.5f, 2.0f, 0.0f, 0.0f}
49 };
50
51 struct vert_stream_0
52 {
53 struct vertex2f pos;
54 struct vertex2f luma_tc;
55 struct vertex2f cb_tc;
56 struct vertex2f cr_tc;
57 };
58
59 enum MACROBLOCK_TYPE
60 {
61 MACROBLOCK_TYPE_INTRA,
62 MACROBLOCK_TYPE_FWD_FRAME_PRED,
63 MACROBLOCK_TYPE_FWD_FIELD_PRED,
64 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
65 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
66 MACROBLOCK_TYPE_BI_FRAME_PRED,
67 MACROBLOCK_TYPE_BI_FIELD_PRED,
68
69 NUM_MACROBLOCK_TYPES
70 };
71
72 static void
73 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
74 {
75 const unsigned max_tokens = 50;
76
77 struct pipe_shader_state vs;
78 struct tgsi_token *tokens;
79 struct tgsi_header *header;
80
81 struct tgsi_full_declaration decl;
82 struct tgsi_full_instruction inst;
83
84 unsigned ti;
85
86 assert(r);
87
88 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
89 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
90 header = (struct tgsi_header *) &tokens[1];
91 *header = tgsi_build_header();
92 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
93
94 ti = 3;
95
96 /*
97 * decl i0 ; Vertex pos
98 * decl i1 ; Luma texcoords
99 * decl i2 ; Chroma Cb texcoords
100 * decl i3 ; Chroma Cr texcoords
101 */
102 for (unsigned i = 0; i < 4; i++)
103 {
104 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
105 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
106 }
107
108 /*
109 * decl o0 ; Vertex pos
110 * decl o1 ; Luma texcoords
111 * decl o2 ; Chroma Cb texcoords
112 * decl o3 ; Chroma Cr texcoords
113 */
114 for (unsigned i = 0; i < 4; i++)
115 {
116 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
117 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
118 }
119
120 /*
121 * mov o0, i0 ; Move input vertex pos to output
122 * mov o1, i1 ; Move input luma texcoords to output
123 * mov o2, i2 ; Move input chroma Cb texcoords to output
124 * mov o3, i3 ; Move input chroma Cr texcoords to output
125 */
126 for (unsigned i = 0; i < 4; ++i)
127 {
128 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
129 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
130 }
131
132 /* end */
133 inst = vl_end();
134 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
135
136 assert(ti <= max_tokens);
137
138 vs.tokens = tokens;
139 r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
140 free(tokens);
141 }
142
143 static void
144 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
145 {
146 const unsigned max_tokens = 100;
147
148 struct pipe_shader_state fs;
149 struct tgsi_token *tokens;
150 struct tgsi_header *header;
151
152 struct tgsi_full_declaration decl;
153 struct tgsi_full_instruction inst;
154
155 unsigned ti;
156
157 assert(r);
158
159 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
160 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
161 header = (struct tgsi_header *) &tokens[1];
162 *header = tgsi_build_header();
163 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
164
165 ti = 3;
166
167 /*
168 * decl i0 ; Luma texcoords
169 * decl i1 ; Chroma Cb texcoords
170 * decl i2 ; Chroma Cr texcoords
171 */
172 for (unsigned i = 0; i < 3; ++i)
173 {
174 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
175 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
176 }
177
178 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
179 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
180 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
181
182 /* decl o0 ; Fragment color */
183 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
184 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
185
186 /* decl t0, t1 */
187 decl = vl_decl_temps(0, 1);
188 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
189
190 /*
191 * decl s0 ; Sampler for luma texture
192 * decl s1 ; Sampler for chroma Cb texture
193 * decl s2 ; Sampler for chroma Cr texture
194 */
195 for (unsigned i = 0; i < 3; ++i)
196 {
197 decl = vl_decl_samplers(i, i);
198 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
199 }
200
201 /*
202 * tex2d t1, i0, s0 ; Read texel from luma texture
203 * mov t0.x, t1.x ; Move luma sample into .x component
204 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
205 * mov t0.y, t1.x ; Move Cb sample into .y component
206 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
207 * mov t0.z, t1.x ; Move Cr sample into .z component
208 */
209 for (unsigned i = 0; i < 3; ++i)
210 {
211 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
212 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
213
214 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
215 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
216 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
217 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
218 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
219 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
220 }
221
222 /* mul o0, t0, c0 ; Rescale texel to correct range */
223 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
224 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
225
226 /* end */
227 inst = vl_end();
228 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
229
230 assert(ti <= max_tokens);
231
232 fs.tokens = tokens;
233 r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
234 free(tokens);
235 }
236
237 static void
238 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
239 {
240 const unsigned max_tokens = 100;
241
242 struct pipe_shader_state vs;
243 struct tgsi_token *tokens;
244 struct tgsi_header *header;
245
246 struct tgsi_full_declaration decl;
247 struct tgsi_full_instruction inst;
248
249 unsigned ti;
250
251 assert(r);
252
253 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
254 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
255 header = (struct tgsi_header *) &tokens[1];
256 *header = tgsi_build_header();
257 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
258
259 ti = 3;
260
261 /*
262 * decl i0 ; Vertex pos
263 * decl i1 ; Luma texcoords
264 * decl i2 ; Chroma Cb texcoords
265 * decl i3 ; Chroma Cr texcoords
266 * decl i4 ; Ref surface top field texcoords
267 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
268 */
269 for (unsigned i = 0; i < 6; i++)
270 {
271 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
272 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
273 }
274
275 /*
276 * decl o0 ; Vertex pos
277 * decl o1 ; Luma texcoords
278 * decl o2 ; Chroma Cb texcoords
279 * decl o3 ; Chroma Cr texcoords
280 * decl o4 ; Ref macroblock texcoords
281 */
282 for (unsigned i = 0; i < 5; i++)
283 {
284 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
285 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
286 }
287
288 /*
289 * mov o0, i0 ; Move input vertex pos to output
290 * mov o1, i1 ; Move input luma texcoords to output
291 * mov o2, i2 ; Move input chroma Cb texcoords to output
292 * mov o3, i3 ; Move input chroma Cr texcoords to output
293 */
294 for (unsigned i = 0; i < 4; ++i)
295 {
296 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
297 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
298 }
299
300 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
301 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
302 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
303
304 /* end */
305 inst = vl_end();
306 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
307
308 assert(ti <= max_tokens);
309
310 vs.tokens = tokens;
311 r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
312 free(tokens);
313 }
314
315 static void
316 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
317 {
318 assert(false);
319 }
320
321 static void
322 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
323 {
324 const unsigned max_tokens = 100;
325
326 struct pipe_shader_state fs;
327 struct tgsi_token *tokens;
328 struct tgsi_header *header;
329
330 struct tgsi_full_declaration decl;
331 struct tgsi_full_instruction inst;
332
333 unsigned ti;
334
335 assert(r);
336
337 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
338 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
339 header = (struct tgsi_header *) &tokens[1];
340 *header = tgsi_build_header();
341 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
342
343 ti = 3;
344
345 /*
346 * decl i0 ; Luma texcoords
347 * decl i1 ; Chroma Cb texcoords
348 * decl i2 ; Chroma Cr texcoords
349 * decl i3 ; Ref macroblock texcoords
350 */
351 for (unsigned i = 0; i < 4; ++i)
352 {
353 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
354 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
355 }
356
357 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
358 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
359 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
360
361 /* decl o0 ; Fragment color */
362 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
363 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
364
365 /* decl t0, t1 */
366 decl = vl_decl_temps(0, 1);
367 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
368
369 /*
370 * decl s0 ; Sampler for luma texture
371 * decl s1 ; Sampler for chroma Cb texture
372 * decl s2 ; Sampler for chroma Cr texture
373 * decl s3 ; Sampler for ref surface texture
374 */
375 for (unsigned i = 0; i < 4; ++i)
376 {
377 decl = vl_decl_samplers(i, i);
378 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
379 }
380
381 /*
382 * tex2d t1, i0, s0 ; Read texel from luma texture
383 * mov t0.x, t1.x ; Move luma sample into .x component
384 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
385 * mov t0.y, t1.x ; Move Cb sample into .y component
386 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
387 * mov t0.z, t1.x ; Move Cr sample into .z component
388 */
389 for (unsigned i = 0; i < 3; ++i)
390 {
391 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
392 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
393
394 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
395 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
396 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
397 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
398 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
399 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
400 }
401
402 /* mul t0, t0, c0 ; Rescale texel to correct range */
403 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
404 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
405
406 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
407 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
408 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
409
410 /* add o0, t0, t1 ; Add ref and differential to form final output */
411 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
412 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
413
414 /* end */
415 inst = vl_end();
416 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
417
418 assert(ti <= max_tokens);
419
420 fs.tokens = tokens;
421 r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
422 free(tokens);
423 }
424
425 static void
426 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
427 {
428 assert(false);
429 }
430
431 static void
432 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
433 {
434 const unsigned max_tokens = 100;
435
436 struct pipe_shader_state vs;
437 struct tgsi_token *tokens;
438 struct tgsi_header *header;
439
440 struct tgsi_full_declaration decl;
441 struct tgsi_full_instruction inst;
442
443 unsigned ti;
444
445 assert(r);
446
447 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
448 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
449 header = (struct tgsi_header *) &tokens[1];
450 *header = tgsi_build_header();
451 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
452
453 ti = 3;
454
455 /*
456 * decl i0 ; Vertex pos
457 * decl i1 ; Luma texcoords
458 * decl i2 ; Chroma Cb texcoords
459 * decl i3 ; Chroma Cr texcoords
460 * decl i4 ; First ref macroblock top field texcoords
461 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
462 * decl i6 ; Second ref macroblock top field texcoords
463 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
464 */
465 for (unsigned i = 0; i < 8; i++)
466 {
467 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
468 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
469 }
470
471 /*
472 * decl o0 ; Vertex pos
473 * decl o1 ; Luma texcoords
474 * decl o2 ; Chroma Cb texcoords
475 * decl o3 ; Chroma Cr texcoords
476 * decl o4 ; First ref macroblock texcoords
477 * decl o5 ; Second ref macroblock texcoords
478 */
479 for (unsigned i = 0; i < 6; i++)
480 {
481 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
482 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
483 }
484
485 /*
486 * mov o0, i0 ; Move input vertex pos to output
487 * mov o1, i1 ; Move input luma texcoords to output
488 * mov o2, i2 ; Move input chroma Cb texcoords to output
489 * mov o3, i3 ; Move input chroma Cr texcoords to output
490 */
491 for (unsigned i = 0; i < 4; ++i)
492 {
493 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
494 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
495 }
496
497 /*
498 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
499 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
500 */
501 for (unsigned i = 0; i < 2; ++i)
502 {
503 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
504 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
505 }
506
507 /* end */
508 inst = vl_end();
509 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
510
511 assert(ti <= max_tokens);
512
513 vs.tokens = tokens;
514 r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
515 free(tokens);
516 }
517
518 static void
519 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
520 {
521 assert(false);
522 }
523
524 static void
525 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
526 {
527 const unsigned max_tokens = 100;
528
529 struct pipe_shader_state fs;
530 struct tgsi_token *tokens;
531 struct tgsi_header *header;
532
533 struct tgsi_full_declaration decl;
534 struct tgsi_full_instruction inst;
535
536 unsigned ti;
537
538 assert(r);
539
540 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
541 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
542 header = (struct tgsi_header *) &tokens[1];
543 *header = tgsi_build_header();
544 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
545
546 ti = 3;
547
548 /*
549 * decl i0 ; Luma texcoords
550 * decl i1 ; Chroma Cb texcoords
551 * decl i2 ; Chroma Cr texcoords
552 * decl i3 ; First ref macroblock texcoords
553 * decl i4 ; Second ref macroblock texcoords
554 */
555 for (unsigned i = 0; i < 5; ++i)
556 {
557 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
558 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
559 }
560
561 /*
562 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
563 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
564 */
565 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
566 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
567
568 /* decl o0 ; Fragment color */
569 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
570 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
571
572 /* decl t0-t2 */
573 decl = vl_decl_temps(0, 2);
574 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
575
576 /*
577 * decl s0 ; Sampler for luma texture
578 * decl s1 ; Sampler for chroma Cb texture
579 * decl s2 ; Sampler for chroma Cr texture
580 * decl s3 ; Sampler for first ref surface texture
581 * decl s4 ; Sampler for second ref surface texture
582 */
583 for (unsigned i = 0; i < 5; ++i)
584 {
585 decl = vl_decl_samplers(i, i);
586 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
587 }
588
589 /*
590 * tex2d t1, i0, s0 ; Read texel from luma texture
591 * mov t0.x, t1.x ; Move luma sample into .x component
592 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
593 * mov t0.y, t1.x ; Move Cb sample into .y component
594 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
595 * mov t0.z, t1.x ; Move Cr sample into .z component
596 */
597 for (unsigned i = 0; i < 3; ++i)
598 {
599 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
600 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
601
602 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
603 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
604 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
605 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
606 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
607 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
608 }
609
610 /* mul t0, t0, c0 ; Rescale texel to correct range */
611 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
612 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
613
614 /*
615 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
616 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
617 */
618 for (unsigned i = 0; i < 2; ++i)
619 {
620 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
621 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
622 }
623
624 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
625 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
626 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
627 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
628 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
629 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
630 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
631
632 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
633 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
634 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
635
636 /* end */
637 inst = vl_end();
638 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
639
640 assert(ti <= max_tokens);
641
642 fs.tokens = tokens;
643 r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
644 free(tokens);
645 }
646
647 static void
648 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
649 {
650 assert(false);
651 }
652
653 static void
654 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
655 {
656 assert(r);
657
658 for (unsigned i = 0; i < 3; ++i)
659 {
660 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
661 (
662 r->pipe->screen, r->textures.all[i],
663 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
664 r->textures.all[i]->width[0], r->textures.all[i]->height[0]
665 );
666
667 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
668 }
669 }
670
671 static void
672 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
673 {
674 assert(r);
675
676 for (unsigned i = 0; i < 3; ++i)
677 {
678 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
679 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
680 }
681 }
682
683 static bool
684 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
685 {
686 struct pipe_sampler_state sampler;
687 unsigned filters[5];
688
689 assert(r);
690
691 r->viewport.scale[0] = r->pot_buffers ?
692 util_next_power_of_two(r->picture_width) : r->picture_width;
693 r->viewport.scale[1] = r->pot_buffers ?
694 util_next_power_of_two(r->picture_height) : r->picture_height;
695 r->viewport.scale[2] = 1;
696 r->viewport.scale[3] = 1;
697 r->viewport.translate[0] = 0;
698 r->viewport.translate[1] = 0;
699 r->viewport.translate[2] = 0;
700 r->viewport.translate[3] = 0;
701
702 r->fb_state.width = r->pot_buffers ?
703 util_next_power_of_two(r->picture_width) : r->picture_width;
704 r->fb_state.height = r->pot_buffers ?
705 util_next_power_of_two(r->picture_height) : r->picture_height;
706 r->fb_state.nr_cbufs = 1;
707 r->fb_state.zsbuf = NULL;
708
709 /* Luma filter */
710 filters[0] = PIPE_TEX_FILTER_NEAREST;
711 /* Chroma filters */
712 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
713 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
714 {
715 filters[1] = PIPE_TEX_FILTER_NEAREST;
716 filters[2] = PIPE_TEX_FILTER_NEAREST;
717 }
718 else
719 {
720 filters[1] = PIPE_TEX_FILTER_LINEAR;
721 filters[2] = PIPE_TEX_FILTER_LINEAR;
722 }
723 /* Fwd, bkwd ref filters */
724 filters[3] = PIPE_TEX_FILTER_LINEAR;
725 filters[4] = PIPE_TEX_FILTER_LINEAR;
726
727 for (unsigned i = 0; i < 5; ++i)
728 {
729 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
730 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
731 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
732 sampler.min_img_filter = filters[i];
733 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
734 sampler.mag_img_filter = filters[i];
735 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
736 sampler.compare_func = PIPE_FUNC_ALWAYS;
737 sampler.normalized_coords = 1;
738 /*sampler.prefilter = ; */
739 /*sampler.shadow_ambient = ; */
740 /*sampler.lod_bias = ; */
741 sampler.min_lod = 0;
742 /*sampler.max_lod = ; */
743 /*sampler.border_color[i] = ; */
744 /*sampler.max_anisotropy = ; */
745 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
746 }
747
748 return true;
749 }
750
751 static void
752 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
753 {
754 assert(r);
755
756 for (unsigned i = 0; i < 5; ++i)
757 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
758 }
759
760 static bool
761 init_shaders(struct vl_mpeg12_mc_renderer *r)
762 {
763 assert(r);
764
765 create_intra_vert_shader(r);
766 create_intra_frag_shader(r);
767 create_frame_pred_vert_shader(r);
768 create_frame_pred_frag_shader(r);
769 create_frame_bi_pred_vert_shader(r);
770 create_frame_bi_pred_frag_shader(r);
771
772 return true;
773 }
774
775 static void
776 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
777 {
778 assert(r);
779
780 r->pipe->delete_vs_state(r->pipe, r->i_vs);
781 r->pipe->delete_fs_state(r->pipe, r->i_fs);
782 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
783 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
784 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
785 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
786 }
787
788 static bool
789 init_buffers(struct vl_mpeg12_mc_renderer *r)
790 {
791 struct pipe_texture template;
792
793 const unsigned mbw =
794 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
795 const unsigned mbh =
796 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
797
798 assert(r);
799
800 r->macroblocks_per_batch =
801 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
802 r->num_macroblocks = 0;
803 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
804
805 memset(&template, 0, sizeof(struct pipe_texture));
806 template.target = PIPE_TEXTURE_2D;
807 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
808 template.format = PIPE_FORMAT_R16_SNORM;
809 template.last_level = 0;
810 template.width[0] = r->pot_buffers ?
811 util_next_power_of_two(r->picture_width) : r->picture_width;
812 template.height[0] = r->pot_buffers ?
813 util_next_power_of_two(r->picture_height) : r->picture_height;
814 template.depth[0] = 1;
815 pf_get_block(template.format, &template.block);
816 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
817
818 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
819
820 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420)
821 {
822 template.width[0] = r->pot_buffers ?
823 util_next_power_of_two(r->picture_width / 2) :
824 r->picture_width / 2;
825 template.height[0] = r->pot_buffers ?
826 util_next_power_of_two(r->picture_height / 2) :
827 r->picture_height / 2;
828 }
829 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
830 template.height[0] = r->pot_buffers ?
831 util_next_power_of_two(r->picture_height / 2) :
832 r->picture_height / 2;
833
834 r->textures.individual.cb =
835 r->pipe->screen->texture_create(r->pipe->screen, &template);
836 r->textures.individual.cr =
837 r->pipe->screen->texture_create(r->pipe->screen, &template);
838
839 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
840 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
841 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
842 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
843 (
844 r->pipe->screen,
845 DEFAULT_BUF_ALIGNMENT,
846 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
847 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
848 );
849
850 for (unsigned i = 1; i < 3; ++i)
851 {
852 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
853 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
854 r->vertex_bufs.all[i].buffer_offset = 0;
855 r->vertex_bufs.all[i].buffer = pipe_buffer_create
856 (
857 r->pipe->screen,
858 DEFAULT_BUF_ALIGNMENT,
859 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
860 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
861 );
862 }
863
864 /* Position element */
865 r->vertex_elems[0].src_offset = 0;
866 r->vertex_elems[0].vertex_buffer_index = 0;
867 r->vertex_elems[0].nr_components = 2;
868 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
869
870 /* Luma, texcoord element */
871 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
872 r->vertex_elems[1].vertex_buffer_index = 0;
873 r->vertex_elems[1].nr_components = 2;
874 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
875
876 /* Chroma Cr texcoord element */
877 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
878 r->vertex_elems[2].vertex_buffer_index = 0;
879 r->vertex_elems[2].nr_components = 2;
880 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
881
882 /* Chroma Cb texcoord element */
883 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
884 r->vertex_elems[3].vertex_buffer_index = 0;
885 r->vertex_elems[3].nr_components = 2;
886 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
887
888 /* First ref surface top field texcoord element */
889 r->vertex_elems[4].src_offset = 0;
890 r->vertex_elems[4].vertex_buffer_index = 1;
891 r->vertex_elems[4].nr_components = 2;
892 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
893
894 /* First ref surface bottom field texcoord element */
895 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
896 r->vertex_elems[5].vertex_buffer_index = 1;
897 r->vertex_elems[5].nr_components = 2;
898 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
899
900 /* Second ref surface top field texcoord element */
901 r->vertex_elems[6].src_offset = 0;
902 r->vertex_elems[6].vertex_buffer_index = 2;
903 r->vertex_elems[6].nr_components = 2;
904 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
905
906 /* Second ref surface bottom field texcoord element */
907 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
908 r->vertex_elems[7].vertex_buffer_index = 2;
909 r->vertex_elems[7].nr_components = 2;
910 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
911
912 r->vs_const_buf.buffer = pipe_buffer_create
913 (
914 r->pipe->screen,
915 DEFAULT_BUF_ALIGNMENT,
916 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
917 sizeof(struct vertex_shader_consts)
918 );
919
920 r->fs_const_buf.buffer = pipe_buffer_create
921 (
922 r->pipe->screen,
923 DEFAULT_BUF_ALIGNMENT,
924 PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
925 );
926
927 memcpy
928 (
929 pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
930 &fs_consts, sizeof(struct fragment_shader_consts)
931 );
932
933 pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
934
935 return true;
936 }
937
938 static void
939 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
940 {
941 assert(r);
942
943 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
944 pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
945
946 for (unsigned i = 0; i < 3; ++i)
947 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
948
949 for (unsigned i = 0; i < 3; ++i)
950 pipe_texture_reference(&r->textures.all[i], NULL);
951
952 FREE(r->macroblock_buf);
953 }
954
955 static enum MACROBLOCK_TYPE
956 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
957 {
958 assert(mb);
959
960 switch (mb->mb_type)
961 {
962 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
963 return MACROBLOCK_TYPE_INTRA;
964 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
965 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
966 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
967 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
968 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
969 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
970 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
971 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
972 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
973 default:
974 assert(0);
975 }
976
977 /* Unreachable */
978 return -1;
979 }
980
981 /* XXX: One of these days this will have to be killed with fire */
982 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
983 do { \
984 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
985 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
986 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
987 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
988 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
989 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
990 \
991 if (!use_zb || (cbp) & (lm)) \
992 { \
993 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
994 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
995 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
996 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
997 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
998 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
999 } \
1000 else \
1001 { \
1002 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1003 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1004 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1005 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1006 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1007 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1008 } \
1009 \
1010 if (!use_zb || (cbp) & (cbm)) \
1011 { \
1012 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1013 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1014 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1015 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1016 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1017 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1018 } \
1019 else \
1020 { \
1021 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1022 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1023 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1024 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1025 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1026 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1027 } \
1028 \
1029 if (!use_zb || (cbp) & (crm)) \
1030 { \
1031 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1032 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1033 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1034 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1035 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1036 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1037 } \
1038 else \
1039 { \
1040 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1041 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1042 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1043 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1044 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1045 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1046 } \
1047 } while (0)
1048
1049 static void
1050 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
1051 struct pipe_mpeg12_macroblock *mb, unsigned pos,
1052 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
1053 {
1054 struct vertex2f mo_vec[2];
1055
1056 assert(r);
1057 assert(mb);
1058 assert(ycbcr_vb);
1059 assert(pos < r->macroblocks_per_batch);
1060
1061 switch (mb->mb_type)
1062 {
1063 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1064 {
1065 struct vertex2f *vb;
1066
1067 assert(ref_vb && ref_vb[1]);
1068
1069 vb = ref_vb[1] + pos * 2 * 24;
1070
1071 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1072 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1073
1074 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME)
1075 {
1076 for (unsigned i = 0; i < 24 * 2; i += 2)
1077 {
1078 vb[i].x = mo_vec[0].x;
1079 vb[i].y = mo_vec[0].y;
1080 }
1081 }
1082 else
1083 {
1084 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1085 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1086
1087 for (unsigned i = 0; i < 24 * 2; i += 2)
1088 {
1089 vb[i].x = mo_vec[0].x;
1090 vb[i].y = mo_vec[0].y;
1091 vb[i + 1].x = mo_vec[1].x;
1092 vb[i + 1].y = mo_vec[1].y;
1093 }
1094 }
1095
1096 /* fall-through */
1097 }
1098 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1099 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1100 {
1101 struct vertex2f *vb;
1102
1103 assert(ref_vb && ref_vb[0]);
1104
1105 vb = ref_vb[0] + pos * 2 * 24;
1106
1107 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD)
1108 {
1109 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1110 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1111
1112 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD)
1113 {
1114 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1115 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1116 }
1117 }
1118 else
1119 {
1120 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
1121 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
1122
1123 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD)
1124 {
1125 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
1126 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
1127 }
1128 }
1129
1130 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME)
1131 {
1132 for (unsigned i = 0; i < 24 * 2; i += 2)
1133 {
1134 vb[i].x = mo_vec[0].x;
1135 vb[i].y = mo_vec[0].y;
1136 }
1137 }
1138 else
1139 {
1140 for (unsigned i = 0; i < 24 * 2; i += 2)
1141 {
1142 vb[i].x = mo_vec[0].x;
1143 vb[i].y = mo_vec[0].y;
1144 vb[i + 1].x = mo_vec[1].x;
1145 vb[i + 1].y = mo_vec[1].y;
1146 }
1147 }
1148
1149 /* fall-through */
1150 }
1151 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
1152 {
1153 const struct vertex2f unit =
1154 {
1155 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
1156 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
1157 };
1158 const struct vertex2f half =
1159 {
1160 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
1161 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
1162 };
1163 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
1164
1165 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
1166
1167 SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
1168 unit.x, unit.y, 0, 0, half.x, half.y,
1169 32, 2, 1, use_zb, r->zero_block);
1170
1171 SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
1172 unit.x, unit.y, half.x, 0, half.x, half.y,
1173 16, 2, 1, use_zb, r->zero_block);
1174
1175 SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
1176 unit.x, unit.y, 0, half.y, half.x, half.y,
1177 8, 2, 1, use_zb, r->zero_block);
1178
1179 SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
1180 unit.x, unit.y, half.x, half.y, half.x, half.y,
1181 4, 2, 1, use_zb, r->zero_block);
1182
1183 break;
1184 }
1185 default:
1186 assert(0);
1187 }
1188 }
1189
1190 static void
1191 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1192 unsigned *num_macroblocks)
1193 {
1194 unsigned offset[NUM_MACROBLOCK_TYPES];
1195 struct vert_stream_0 *ycbcr_vb;
1196 struct vertex2f *ref_vb[2];
1197
1198 assert(r);
1199 assert(num_macroblocks);
1200
1201 for (unsigned i = 0; i < r->num_macroblocks; ++i)
1202 {
1203 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1204 ++num_macroblocks[mb_type];
1205 }
1206
1207 offset[0] = 0;
1208
1209 for (unsigned i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1210 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1211
1212 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1213 (
1214 r->pipe->screen,
1215 r->vertex_bufs.individual.ycbcr.buffer,
1216 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1217 );
1218
1219 for (unsigned i = 0; i < 2; ++i)
1220 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1221 (
1222 r->pipe->screen,
1223 r->vertex_bufs.individual.ref[i].buffer,
1224 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1225 );
1226
1227 for (unsigned i = 0; i < r->num_macroblocks; ++i)
1228 {
1229 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1230
1231 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1232 ycbcr_vb, ref_vb);
1233
1234 ++offset[mb_type];
1235 }
1236
1237 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
1238 for (unsigned i = 0; i < 2; ++i)
1239 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
1240 }
1241
1242 static void
1243 flush(struct vl_mpeg12_mc_renderer *r)
1244 {
1245 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1246 unsigned vb_start = 0;
1247 struct vertex_shader_consts *vs_consts;
1248
1249 assert(r);
1250 assert(r->num_macroblocks == r->macroblocks_per_batch);
1251
1252 gen_macroblock_stream(r, num_macroblocks);
1253
1254 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1255 (
1256 r->pipe->screen, r->surface,
1257 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1258 );
1259
1260 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1261 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1262
1263 vs_consts = pipe_buffer_map
1264 (
1265 r->pipe->screen, r->vs_const_buf.buffer,
1266 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1267 );
1268
1269 vs_consts->denorm.x = r->surface->width[0];
1270 vs_consts->denorm.y = r->surface->height[0];
1271
1272 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1273
1274 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1275 &r->vs_const_buf);
1276 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
1277 &r->fs_const_buf);
1278
1279 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0)
1280 {
1281 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1282 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1283 r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all);
1284 r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all);
1285 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1286 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1287
1288 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1289 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1290 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1291 }
1292
1293 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0)
1294 {
1295 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1296 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1297 r->textures.individual.ref[0] = r->past;
1298 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1299 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1300 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1301 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1302
1303 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1304 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1305 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1306 }
1307
1308 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ )
1309 {
1310 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1311 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1312 r->textures.individual.ref[0] = r->past;
1313 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1314 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1315 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1316 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1317
1318 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1319 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1320 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1321 }
1322
1323 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0)
1324 {
1325 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1326 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1327 r->textures.individual.ref[0] = r->future;
1328 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1329 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1330 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1331 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1332
1333 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1334 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1335 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1336 }
1337
1338 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ )
1339 {
1340 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1341 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1342 r->textures.individual.ref[0] = r->future;
1343 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1344 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1345 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1346 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1347
1348 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1349 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1350 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1351 }
1352
1353 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0)
1354 {
1355 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1356 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1357 r->textures.individual.ref[0] = r->past;
1358 r->textures.individual.ref[1] = r->future;
1359 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1360 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1361 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1362 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1363
1364 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1365 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1366 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1367 }
1368
1369 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ )
1370 {
1371 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1372 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1373 r->textures.individual.ref[0] = r->past;
1374 r->textures.individual.ref[1] = r->future;
1375 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1376 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1377 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1378 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1379
1380 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1381 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1382 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1383 }
1384
1385 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1386 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1387
1388 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1389 for (unsigned i = 0; i < 3; ++i)
1390 r->zero_block[i].x = ZERO_BLOCK_NIL;
1391
1392 r->num_macroblocks = 0;
1393 }
1394
1395 static void
1396 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1397 {
1398 assert(src);
1399 assert(dst);
1400
1401 for (unsigned y = 0; y < BLOCK_HEIGHT; ++y)
1402 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1403 }
1404
1405 static void
1406 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1407 {
1408 assert(src);
1409 assert(dst);
1410
1411 for (unsigned y = 0; y < BLOCK_HEIGHT; ++y)
1412 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1413 }
1414
1415 static void
1416 fill_zero_block(short *dst, unsigned dst_pitch)
1417 {
1418 assert(dst);
1419
1420 for (unsigned y = 0; y < BLOCK_HEIGHT; ++y)
1421 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1422 }
1423
1424 static void
1425 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1426 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1427 {
1428 unsigned tex_pitch;
1429 short *texels;
1430 unsigned tb = 0, sb = 0;
1431 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1432
1433 assert(r);
1434 assert(blocks);
1435
1436 tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size;
1437 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1438
1439 for (unsigned y = 0; y < 2; ++y)
1440 {
1441 for (unsigned x = 0; x < 2; ++x, ++tb)
1442 {
1443 if ((cbp >> (5 - tb)) & 1)
1444 {
1445 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME)
1446 {
1447 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1448 texels + y * tex_pitch * BLOCK_WIDTH +
1449 x * BLOCK_WIDTH, tex_pitch);
1450 }
1451 else
1452 {
1453 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1454 texels + y * tex_pitch + x * BLOCK_WIDTH,
1455 tex_pitch);
1456 }
1457
1458 ++sb;
1459 }
1460 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE)
1461 {
1462 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1463 ZERO_BLOCK_IS_NIL(r->zero_block[0]))
1464 {
1465 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1466 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1467 {
1468 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1469 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1470 }
1471 }
1472 }
1473 }
1474 }
1475
1476 /* TODO: Implement 422, 444 */
1477 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1478
1479 mbpx /= 2;
1480 mbpy /= 2;
1481
1482 for (tb = 0; tb < 2; ++tb)
1483 {
1484 tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size;
1485 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1486
1487 if ((cbp >> (1 - tb)) & 1)
1488 {
1489 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1490 ++sb;
1491 }
1492 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE)
1493 {
1494 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1495 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1]))
1496 {
1497 fill_zero_block(texels, tex_pitch);
1498 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1499 {
1500 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1501 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1502 }
1503 }
1504 }
1505 }
1506 }
1507
1508 static void
1509 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1510 struct pipe_mpeg12_macroblock *mb)
1511 {
1512 assert(r);
1513 assert(mb);
1514 assert(r->num_macroblocks < r->macroblocks_per_batch);
1515
1516 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1517 sizeof(struct pipe_mpeg12_macroblock));
1518
1519 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1520
1521 ++r->num_macroblocks;
1522 }
1523
1524 bool
1525 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1526 struct pipe_context *pipe,
1527 unsigned picture_width,
1528 unsigned picture_height,
1529 enum pipe_video_chroma_format chroma_format,
1530 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1531 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1532 bool pot_buffers)
1533 {
1534 assert(renderer);
1535 assert(pipe);
1536 /* TODO: Implement other policies */
1537 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1538 /* TODO: Implement this */
1539 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1540 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1541 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1542 assert(pot_buffers);
1543
1544 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1545
1546 renderer->pipe = pipe;
1547 renderer->picture_width = picture_width;
1548 renderer->picture_height = picture_height;
1549 renderer->chroma_format = chroma_format;
1550 renderer->bufmode = bufmode;
1551 renderer->eb_handling = eb_handling;
1552 renderer->pot_buffers = pot_buffers;
1553
1554 if (!init_pipe_state(renderer))
1555 return false;
1556 if (!init_shaders(renderer))
1557 {
1558 cleanup_pipe_state(renderer);
1559 return false;
1560 }
1561 if (!init_buffers(renderer))
1562 {
1563 cleanup_shaders(renderer);
1564 cleanup_pipe_state(renderer);
1565 return false;
1566 }
1567
1568 renderer->surface = NULL;
1569 renderer->past = NULL;
1570 renderer->future = NULL;
1571 for (unsigned i = 0; i < 3; ++i)
1572 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1573 renderer->num_macroblocks = 0;
1574
1575 xfer_buffers_map(renderer);
1576
1577 return true;
1578 }
1579
1580 void
1581 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1582 {
1583 assert(renderer);
1584
1585 xfer_buffers_unmap(renderer);
1586
1587 cleanup_pipe_state(renderer);
1588 cleanup_shaders(renderer);
1589 cleanup_buffers(renderer);
1590 }
1591
1592 void
1593 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1594 *renderer,
1595 struct pipe_texture *surface,
1596 struct pipe_texture *past,
1597 struct pipe_texture *future,
1598 unsigned num_macroblocks,
1599 struct pipe_mpeg12_macroblock
1600 *mpeg12_macroblocks,
1601 struct pipe_fence_handle **fence)
1602 {
1603 bool new_surface = false;
1604
1605 assert(renderer);
1606 assert(surface);
1607 assert(num_macroblocks);
1608 assert(mpeg12_macroblocks);
1609
1610 if (renderer->surface)
1611 {
1612 if (surface != renderer->surface)
1613 {
1614 if (renderer->num_macroblocks > 0)
1615 {
1616 xfer_buffers_unmap(renderer);
1617 flush(renderer);
1618 }
1619
1620 new_surface = true;
1621 }
1622
1623 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1624 assert(surface != renderer->surface || renderer->past == past);
1625 assert(surface != renderer->surface || renderer->future == future);
1626 }
1627 else
1628 new_surface = true;
1629
1630 if (new_surface)
1631 {
1632 renderer->surface = surface;
1633 renderer->past = past;
1634 renderer->future = future;
1635 renderer->fence = fence;
1636 renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
1637 renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
1638 }
1639
1640 while (num_macroblocks)
1641 {
1642 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1643 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1644
1645 for (unsigned i = 0; i < num_to_submit; ++i)
1646 {
1647 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1648 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1649 }
1650
1651 num_macroblocks -= num_to_submit;
1652
1653 if (renderer->num_macroblocks == renderer->macroblocks_per_batch)
1654 {
1655 xfer_buffers_unmap(renderer);
1656 flush(renderer);
1657 xfer_buffers_map(renderer);
1658 /* Next time we get this surface it may have new ref frames */
1659 renderer->surface = NULL;
1660 }
1661 }
1662 }