Revert "mesa: Remove pointless comparison of unsigned integer with a negative constant."
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <tgsi/tgsi_parse.h>
36 #include <tgsi/tgsi_build.h>
37 #include "vl_shader_build.h"
38
39 #define DEFAULT_BUF_ALIGNMENT 1
40 #define MACROBLOCK_WIDTH 16
41 #define MACROBLOCK_HEIGHT 16
42 #define BLOCK_WIDTH 8
43 #define BLOCK_HEIGHT 8
44 #define ZERO_BLOCK_NIL -1.0f
45 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
46
47 struct vertex2f
48 {
49 float x, y;
50 };
51
52 struct vertex4f
53 {
54 float x, y, z, w;
55 };
56
57 struct vertex_shader_consts
58 {
59 struct vertex4f denorm;
60 };
61
62 struct fragment_shader_consts
63 {
64 struct vertex4f multiplier;
65 struct vertex4f div;
66 };
67
68 /*
69 * Muliplier renormalizes block samples from 16 bits to 12 bits.
70 * Divider is used when calculating Y % 2 for choosing top or bottom
71 * field for P or B macroblocks.
72 * TODO: Use immediates.
73 */
74 static const struct fragment_shader_consts fs_consts = {
75 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
76 {0.5f, 2.0f, 0.0f, 0.0f}
77 };
78
79 struct vert_stream_0
80 {
81 struct vertex2f pos;
82 struct vertex2f luma_tc;
83 struct vertex2f cb_tc;
84 struct vertex2f cr_tc;
85 };
86
87 enum MACROBLOCK_TYPE
88 {
89 MACROBLOCK_TYPE_INTRA,
90 MACROBLOCK_TYPE_FWD_FRAME_PRED,
91 MACROBLOCK_TYPE_FWD_FIELD_PRED,
92 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
93 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
94 MACROBLOCK_TYPE_BI_FRAME_PRED,
95 MACROBLOCK_TYPE_BI_FIELD_PRED,
96
97 NUM_MACROBLOCK_TYPES
98 };
99
100 static void
101 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
102 {
103 const unsigned max_tokens = 50;
104
105 struct pipe_shader_state vs;
106 struct tgsi_token *tokens;
107 struct tgsi_header *header;
108
109 struct tgsi_full_declaration decl;
110 struct tgsi_full_instruction inst;
111
112 unsigned ti;
113
114 unsigned i;
115
116 assert(r);
117
118 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
119 header = (struct tgsi_header *) &tokens[0];
120 *header = tgsi_build_header();
121 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
122
123 ti = 2;
124
125 /*
126 * decl i0 ; Vertex pos
127 * decl i1 ; Luma texcoords
128 * decl i2 ; Chroma Cb texcoords
129 * decl i3 ; Chroma Cr texcoords
130 */
131 for (i = 0; i < 4; i++) {
132 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
133 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
134 }
135
136 /*
137 * decl o0 ; Vertex pos
138 * decl o1 ; Luma texcoords
139 * decl o2 ; Chroma Cb texcoords
140 * decl o3 ; Chroma Cr texcoords
141 */
142 for (i = 0; i < 4; i++) {
143 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
144 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
145 }
146
147 /*
148 * mov o0, i0 ; Move input vertex pos to output
149 * mov o1, i1 ; Move input luma texcoords to output
150 * mov o2, i2 ; Move input chroma Cb texcoords to output
151 * mov o3, i3 ; Move input chroma Cr texcoords to output
152 */
153 for (i = 0; i < 4; ++i) {
154 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
155 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
156 }
157
158 /* end */
159 inst = vl_end();
160 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
161
162 assert(ti <= max_tokens);
163
164 vs.tokens = tokens;
165 r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
166 free(tokens);
167 }
168
169 static void
170 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
171 {
172 const unsigned max_tokens = 100;
173
174 struct pipe_shader_state fs;
175 struct tgsi_token *tokens;
176 struct tgsi_header *header;
177
178 struct tgsi_full_declaration decl;
179 struct tgsi_full_instruction inst;
180
181 unsigned ti;
182
183 unsigned i;
184
185 assert(r);
186
187 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
188 header = (struct tgsi_header *) &tokens[0];
189 *header = tgsi_build_header();
190 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
191
192 ti = 2;
193
194 /*
195 * decl i0 ; Luma texcoords
196 * decl i1 ; Chroma Cb texcoords
197 * decl i2 ; Chroma Cr texcoords
198 */
199 for (i = 0; i < 3; ++i) {
200 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
201 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
202 }
203
204 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
205 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
206 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
207
208 /* decl o0 ; Fragment color */
209 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
210 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
211
212 /* decl t0, t1 */
213 decl = vl_decl_temps(0, 1);
214 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
215
216 /*
217 * decl s0 ; Sampler for luma texture
218 * decl s1 ; Sampler for chroma Cb texture
219 * decl s2 ; Sampler for chroma Cr texture
220 */
221 for (i = 0; i < 3; ++i) {
222 decl = vl_decl_samplers(i, i);
223 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
224 }
225
226 /*
227 * tex2d t1, i0, s0 ; Read texel from luma texture
228 * mov t0.x, t1.x ; Move luma sample into .x component
229 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
230 * mov t0.y, t1.x ; Move Cb sample into .y component
231 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
232 * mov t0.z, t1.x ; Move Cr sample into .z component
233 */
234 for (i = 0; i < 3; ++i) {
235 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
236 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
237
238 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
239 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
240 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
241 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
242 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
243 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
244 }
245
246 /* mul o0, t0, c0 ; Rescale texel to correct range */
247 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
248 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
249
250 /* end */
251 inst = vl_end();
252 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
253
254 assert(ti <= max_tokens);
255
256 fs.tokens = tokens;
257 r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
258 free(tokens);
259 }
260
261 static void
262 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
263 {
264 const unsigned max_tokens = 100;
265
266 struct pipe_shader_state vs;
267 struct tgsi_token *tokens;
268 struct tgsi_header *header;
269
270 struct tgsi_full_declaration decl;
271 struct tgsi_full_instruction inst;
272
273 unsigned ti;
274
275 unsigned i;
276
277 assert(r);
278
279 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
280 header = (struct tgsi_header *) &tokens[0];
281 *header = tgsi_build_header();
282 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
283
284 ti = 2;
285
286 /*
287 * decl i0 ; Vertex pos
288 * decl i1 ; Luma texcoords
289 * decl i2 ; Chroma Cb texcoords
290 * decl i3 ; Chroma Cr texcoords
291 * decl i4 ; Ref surface top field texcoords
292 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
293 */
294 for (i = 0; i < 6; i++) {
295 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
296 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
297 }
298
299 /*
300 * decl o0 ; Vertex pos
301 * decl o1 ; Luma texcoords
302 * decl o2 ; Chroma Cb texcoords
303 * decl o3 ; Chroma Cr texcoords
304 * decl o4 ; Ref macroblock texcoords
305 */
306 for (i = 0; i < 5; i++) {
307 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
308 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
309 }
310
311 /*
312 * mov o0, i0 ; Move input vertex pos to output
313 * mov o1, i1 ; Move input luma texcoords to output
314 * mov o2, i2 ; Move input chroma Cb texcoords to output
315 * mov o3, i3 ; Move input chroma Cr texcoords to output
316 */
317 for (i = 0; i < 4; ++i) {
318 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
319 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
320 }
321
322 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
323 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
324 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
325
326 /* end */
327 inst = vl_end();
328 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
329
330 assert(ti <= max_tokens);
331
332 vs.tokens = tokens;
333 r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
334 free(tokens);
335 }
336
337 #if 0
338 static void
339 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
340 {
341 assert(false);
342 }
343 #endif
344
345 static void
346 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
347 {
348 const unsigned max_tokens = 100;
349
350 struct pipe_shader_state fs;
351 struct tgsi_token *tokens;
352 struct tgsi_header *header;
353
354 struct tgsi_full_declaration decl;
355 struct tgsi_full_instruction inst;
356
357 unsigned ti;
358
359 unsigned i;
360
361 assert(r);
362
363 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
364 header = (struct tgsi_header *) &tokens[0];
365 *header = tgsi_build_header();
366 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
367
368 ti = 2;
369
370 /*
371 * decl i0 ; Luma texcoords
372 * decl i1 ; Chroma Cb texcoords
373 * decl i2 ; Chroma Cr texcoords
374 * decl i3 ; Ref macroblock texcoords
375 */
376 for (i = 0; i < 4; ++i) {
377 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
378 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
379 }
380
381 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
382 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
383 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
384
385 /* decl o0 ; Fragment color */
386 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
387 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
388
389 /* decl t0, t1 */
390 decl = vl_decl_temps(0, 1);
391 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
392
393 /*
394 * decl s0 ; Sampler for luma texture
395 * decl s1 ; Sampler for chroma Cb texture
396 * decl s2 ; Sampler for chroma Cr texture
397 * decl s3 ; Sampler for ref surface texture
398 */
399 for (i = 0; i < 4; ++i) {
400 decl = vl_decl_samplers(i, i);
401 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
402 }
403
404 /*
405 * tex2d t1, i0, s0 ; Read texel from luma texture
406 * mov t0.x, t1.x ; Move luma sample into .x component
407 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
408 * mov t0.y, t1.x ; Move Cb sample into .y component
409 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
410 * mov t0.z, t1.x ; Move Cr sample into .z component
411 */
412 for (i = 0; i < 3; ++i) {
413 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
414 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
415
416 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
417 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
418 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
419 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
420 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
421 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
422 }
423
424 /* mul t0, t0, c0 ; Rescale texel to correct range */
425 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
426 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
427
428 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
429 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
430 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
431
432 /* add o0, t0, t1 ; Add ref and differential to form final output */
433 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
434 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
435
436 /* end */
437 inst = vl_end();
438 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
439
440 assert(ti <= max_tokens);
441
442 fs.tokens = tokens;
443 r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
444 free(tokens);
445 }
446
447 #if 0
448 static void
449 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
450 {
451 assert(false);
452 }
453 #endif
454
455 static void
456 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
457 {
458 const unsigned max_tokens = 100;
459
460 struct pipe_shader_state vs;
461 struct tgsi_token *tokens;
462 struct tgsi_header *header;
463
464 struct tgsi_full_declaration decl;
465 struct tgsi_full_instruction inst;
466
467 unsigned ti;
468
469 unsigned i;
470
471 assert(r);
472
473 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
474 header = (struct tgsi_header *) &tokens[0];
475 *header = tgsi_build_header();
476 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
477
478 ti = 2;
479
480 /*
481 * decl i0 ; Vertex pos
482 * decl i1 ; Luma texcoords
483 * decl i2 ; Chroma Cb texcoords
484 * decl i3 ; Chroma Cr texcoords
485 * decl i4 ; First ref macroblock top field texcoords
486 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
487 * decl i6 ; Second ref macroblock top field texcoords
488 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
489 */
490 for (i = 0; i < 8; i++) {
491 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
492 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
493 }
494
495 /*
496 * decl o0 ; Vertex pos
497 * decl o1 ; Luma texcoords
498 * decl o2 ; Chroma Cb texcoords
499 * decl o3 ; Chroma Cr texcoords
500 * decl o4 ; First ref macroblock texcoords
501 * decl o5 ; Second ref macroblock texcoords
502 */
503 for (i = 0; i < 6; i++) {
504 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
505 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
506 }
507
508 /*
509 * mov o0, i0 ; Move input vertex pos to output
510 * mov o1, i1 ; Move input luma texcoords to output
511 * mov o2, i2 ; Move input chroma Cb texcoords to output
512 * mov o3, i3 ; Move input chroma Cr texcoords to output
513 */
514 for (i = 0; i < 4; ++i) {
515 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
516 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
517 }
518
519 /*
520 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
521 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
522 */
523 for (i = 0; i < 2; ++i) {
524 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
525 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
526 }
527
528 /* end */
529 inst = vl_end();
530 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
531
532 assert(ti <= max_tokens);
533
534 vs.tokens = tokens;
535 r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
536 free(tokens);
537 }
538
539 #if 0
540 static void
541 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
542 {
543 assert(false);
544 }
545 #endif
546
547 static void
548 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
549 {
550 const unsigned max_tokens = 100;
551
552 struct pipe_shader_state fs;
553 struct tgsi_token *tokens;
554 struct tgsi_header *header;
555
556 struct tgsi_full_declaration decl;
557 struct tgsi_full_instruction inst;
558
559 unsigned ti;
560
561 unsigned i;
562
563 assert(r);
564
565 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
566 header = (struct tgsi_header *) &tokens[0];
567 *header = tgsi_build_header();
568 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
569
570 ti = 2;
571
572 /*
573 * decl i0 ; Luma texcoords
574 * decl i1 ; Chroma Cb texcoords
575 * decl i2 ; Chroma Cr texcoords
576 * decl i3 ; First ref macroblock texcoords
577 * decl i4 ; Second ref macroblock texcoords
578 */
579 for (i = 0; i < 5; ++i) {
580 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
581 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
582 }
583
584 /*
585 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
586 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
587 */
588 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
589 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
590
591 /* decl o0 ; Fragment color */
592 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
593 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
594
595 /* decl t0-t2 */
596 decl = vl_decl_temps(0, 2);
597 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
598
599 /*
600 * decl s0 ; Sampler for luma texture
601 * decl s1 ; Sampler for chroma Cb texture
602 * decl s2 ; Sampler for chroma Cr texture
603 * decl s3 ; Sampler for first ref surface texture
604 * decl s4 ; Sampler for second ref surface texture
605 */
606 for (i = 0; i < 5; ++i) {
607 decl = vl_decl_samplers(i, i);
608 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
609 }
610
611 /*
612 * tex2d t1, i0, s0 ; Read texel from luma texture
613 * mov t0.x, t1.x ; Move luma sample into .x component
614 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
615 * mov t0.y, t1.x ; Move Cb sample into .y component
616 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
617 * mov t0.z, t1.x ; Move Cr sample into .z component
618 */
619 for (i = 0; i < 3; ++i) {
620 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
621 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
622
623 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
624 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
625 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
626 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
627 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
628 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
629 }
630
631 /* mul t0, t0, c0 ; Rescale texel to correct range */
632 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
633 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
634
635 /*
636 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
637 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
638 */
639 for (i = 0; i < 2; ++i) {
640 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
641 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
642 }
643
644 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
645 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
646 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
647 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
648 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
649 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
650 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
651
652 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
653 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
654 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
655
656 /* end */
657 inst = vl_end();
658 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
659
660 assert(ti <= max_tokens);
661
662 fs.tokens = tokens;
663 r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
664 free(tokens);
665 }
666
667 #if 0
668 static void
669 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
670 {
671 assert(false);
672 }
673 #endif
674
675 static void
676 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
677 {
678 unsigned i;
679
680 assert(r);
681
682 for (i = 0; i < 3; ++i) {
683 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
684 (
685 r->pipe->screen, r->textures.all[i],
686 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
687 r->textures.all[i]->width0, r->textures.all[i]->height0
688 );
689
690 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
691 }
692 }
693
694 static void
695 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
696 {
697 unsigned i;
698
699 assert(r);
700
701 for (i = 0; i < 3; ++i) {
702 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
703 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
704 }
705 }
706
707 static bool
708 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
709 {
710 struct pipe_sampler_state sampler;
711 unsigned filters[5];
712 unsigned i;
713
714 assert(r);
715
716 r->viewport.scale[0] = r->pot_buffers ?
717 util_next_power_of_two(r->picture_width) : r->picture_width;
718 r->viewport.scale[1] = r->pot_buffers ?
719 util_next_power_of_two(r->picture_height) : r->picture_height;
720 r->viewport.scale[2] = 1;
721 r->viewport.scale[3] = 1;
722 r->viewport.translate[0] = 0;
723 r->viewport.translate[1] = 0;
724 r->viewport.translate[2] = 0;
725 r->viewport.translate[3] = 0;
726
727 r->scissor.maxx = r->pot_buffers ?
728 util_next_power_of_two(r->picture_width) : r->picture_width;
729 r->scissor.maxy = r->pot_buffers ?
730 util_next_power_of_two(r->picture_height) : r->picture_height;
731
732 r->fb_state.width = r->pot_buffers ?
733 util_next_power_of_two(r->picture_width) : r->picture_width;
734 r->fb_state.height = r->pot_buffers ?
735 util_next_power_of_two(r->picture_height) : r->picture_height;
736 r->fb_state.nr_cbufs = 1;
737 r->fb_state.zsbuf = NULL;
738
739 /* Luma filter */
740 filters[0] = PIPE_TEX_FILTER_NEAREST;
741 /* Chroma filters */
742 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
743 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
744 filters[1] = PIPE_TEX_FILTER_NEAREST;
745 filters[2] = PIPE_TEX_FILTER_NEAREST;
746 }
747 else {
748 filters[1] = PIPE_TEX_FILTER_LINEAR;
749 filters[2] = PIPE_TEX_FILTER_LINEAR;
750 }
751 /* Fwd, bkwd ref filters */
752 filters[3] = PIPE_TEX_FILTER_LINEAR;
753 filters[4] = PIPE_TEX_FILTER_LINEAR;
754
755 for (i = 0; i < 5; ++i) {
756 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
757 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
758 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
759 sampler.min_img_filter = filters[i];
760 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
761 sampler.mag_img_filter = filters[i];
762 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
763 sampler.compare_func = PIPE_FUNC_ALWAYS;
764 sampler.normalized_coords = 1;
765 /*sampler.shadow_ambient = ; */
766 /*sampler.lod_bias = ; */
767 sampler.min_lod = 0;
768 /*sampler.max_lod = ; */
769 /*sampler.border_color[i] = ; */
770 /*sampler.max_anisotropy = ; */
771 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
772 }
773
774 return true;
775 }
776
777 static void
778 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
779 {
780 unsigned i;
781
782 assert(r);
783
784 for (i = 0; i < 5; ++i)
785 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
786 }
787
788 static bool
789 init_shaders(struct vl_mpeg12_mc_renderer *r)
790 {
791 assert(r);
792
793 create_intra_vert_shader(r);
794 create_intra_frag_shader(r);
795 create_frame_pred_vert_shader(r);
796 create_frame_pred_frag_shader(r);
797 create_frame_bi_pred_vert_shader(r);
798 create_frame_bi_pred_frag_shader(r);
799
800 return true;
801 }
802
803 static void
804 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
805 {
806 assert(r);
807
808 r->pipe->delete_vs_state(r->pipe, r->i_vs);
809 r->pipe->delete_fs_state(r->pipe, r->i_fs);
810 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
811 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
812 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
813 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
814 }
815
816 static bool
817 init_buffers(struct vl_mpeg12_mc_renderer *r)
818 {
819 struct pipe_texture template;
820
821 const unsigned mbw =
822 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
823 const unsigned mbh =
824 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
825
826 unsigned i;
827
828 assert(r);
829
830 r->macroblocks_per_batch =
831 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
832 r->num_macroblocks = 0;
833 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
834
835 memset(&template, 0, sizeof(struct pipe_texture));
836 template.target = PIPE_TEXTURE_2D;
837 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
838 template.format = PIPE_FORMAT_R16_SNORM;
839 template.last_level = 0;
840 template.width0 = r->pot_buffers ?
841 util_next_power_of_two(r->picture_width) : r->picture_width;
842 template.height0 = r->pot_buffers ?
843 util_next_power_of_two(r->picture_height) : r->picture_height;
844 template.depth0 = 1;
845 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
846
847 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
848
849 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
850 template.width0 = r->pot_buffers ?
851 util_next_power_of_two(r->picture_width / 2) :
852 r->picture_width / 2;
853 template.height0 = r->pot_buffers ?
854 util_next_power_of_two(r->picture_height / 2) :
855 r->picture_height / 2;
856 }
857 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
858 template.height0 = r->pot_buffers ?
859 util_next_power_of_two(r->picture_height / 2) :
860 r->picture_height / 2;
861
862 r->textures.individual.cb =
863 r->pipe->screen->texture_create(r->pipe->screen, &template);
864 r->textures.individual.cr =
865 r->pipe->screen->texture_create(r->pipe->screen, &template);
866
867 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
868 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
869 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
870 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
871 (
872 r->pipe->screen,
873 DEFAULT_BUF_ALIGNMENT,
874 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
875 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
876 );
877
878 for (i = 1; i < 3; ++i) {
879 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
880 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
881 r->vertex_bufs.all[i].buffer_offset = 0;
882 r->vertex_bufs.all[i].buffer = pipe_buffer_create
883 (
884 r->pipe->screen,
885 DEFAULT_BUF_ALIGNMENT,
886 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
887 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
888 );
889 }
890
891 /* Position element */
892 r->vertex_elems[0].src_offset = 0;
893 r->vertex_elems[0].instance_divisor = 0;
894 r->vertex_elems[0].vertex_buffer_index = 0;
895 r->vertex_elems[0].nr_components = 2;
896 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
897
898 /* Luma, texcoord element */
899 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
900 r->vertex_elems[1].instance_divisor = 0;
901 r->vertex_elems[1].vertex_buffer_index = 0;
902 r->vertex_elems[1].nr_components = 2;
903 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
904
905 /* Chroma Cr texcoord element */
906 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
907 r->vertex_elems[2].instance_divisor = 0;
908 r->vertex_elems[2].vertex_buffer_index = 0;
909 r->vertex_elems[2].nr_components = 2;
910 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
911
912 /* Chroma Cb texcoord element */
913 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
914 r->vertex_elems[3].instance_divisor = 0;
915 r->vertex_elems[3].vertex_buffer_index = 0;
916 r->vertex_elems[3].nr_components = 2;
917 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
918
919 /* First ref surface top field texcoord element */
920 r->vertex_elems[4].src_offset = 0;
921 r->vertex_elems[4].instance_divisor = 0;
922 r->vertex_elems[4].vertex_buffer_index = 1;
923 r->vertex_elems[4].nr_components = 2;
924 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
925
926 /* First ref surface bottom field texcoord element */
927 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
928 r->vertex_elems[5].instance_divisor = 0;
929 r->vertex_elems[5].vertex_buffer_index = 1;
930 r->vertex_elems[5].nr_components = 2;
931 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
932
933 /* Second ref surface top field texcoord element */
934 r->vertex_elems[6].src_offset = 0;
935 r->vertex_elems[6].instance_divisor = 0;
936 r->vertex_elems[6].vertex_buffer_index = 2;
937 r->vertex_elems[6].nr_components = 2;
938 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
939
940 /* Second ref surface bottom field texcoord element */
941 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
942 r->vertex_elems[7].instance_divisor = 0;
943 r->vertex_elems[7].vertex_buffer_index = 2;
944 r->vertex_elems[7].nr_components = 2;
945 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
946
947 r->vs_const_buf = pipe_buffer_create
948 (
949 r->pipe->screen,
950 DEFAULT_BUF_ALIGNMENT,
951 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
952 sizeof(struct vertex_shader_consts)
953 );
954
955 r->fs_const_buf = pipe_buffer_create
956 (
957 r->pipe->screen,
958 DEFAULT_BUF_ALIGNMENT,
959 PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
960 );
961
962 memcpy
963 (
964 pipe_buffer_map(r->pipe->screen, r->fs_const_buf, PIPE_BUFFER_USAGE_CPU_WRITE),
965 &fs_consts, sizeof(struct fragment_shader_consts)
966 );
967
968 pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf);
969
970 return true;
971 }
972
973 static void
974 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
975 {
976 unsigned i;
977
978 assert(r);
979
980 pipe_buffer_reference(&r->vs_const_buf, NULL);
981 pipe_buffer_reference(&r->fs_const_buf, NULL);
982
983 for (i = 0; i < 3; ++i)
984 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
985
986 for (i = 0; i < 3; ++i)
987 pipe_texture_reference(&r->textures.all[i], NULL);
988
989 FREE(r->macroblock_buf);
990 }
991
992 static enum MACROBLOCK_TYPE
993 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
994 {
995 assert(mb);
996
997 switch (mb->mb_type) {
998 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
999 return MACROBLOCK_TYPE_INTRA;
1000 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1001 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
1002 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
1003 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1004 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
1005 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
1006 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1007 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
1008 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
1009 default:
1010 assert(0);
1011 }
1012
1013 /* Unreachable */
1014 return -1;
1015 }
1016
1017 /* XXX: One of these days this will have to be killed with fire */
1018 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1019 do { \
1020 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1021 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1022 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1023 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1024 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1025 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1026 \
1027 if (!use_zb || (cbp) & (lm)) \
1028 { \
1029 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1030 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1031 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1032 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1033 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1034 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1035 } \
1036 else \
1037 { \
1038 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1039 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1040 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1041 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1042 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1043 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1044 } \
1045 \
1046 if (!use_zb || (cbp) & (cbm)) \
1047 { \
1048 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1049 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1050 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1051 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1052 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1053 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1054 } \
1055 else \
1056 { \
1057 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1058 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1059 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1060 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1061 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1062 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1063 } \
1064 \
1065 if (!use_zb || (cbp) & (crm)) \
1066 { \
1067 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1068 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1069 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1070 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1071 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1072 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1073 } \
1074 else \
1075 { \
1076 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1077 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1078 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1079 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1080 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1081 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1082 } \
1083 } while (0)
1084
1085 static void
1086 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
1087 struct pipe_mpeg12_macroblock *mb, unsigned pos,
1088 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
1089 {
1090 struct vertex2f mo_vec[2];
1091
1092 unsigned i;
1093
1094 assert(r);
1095 assert(mb);
1096 assert(ycbcr_vb);
1097 assert(pos < r->macroblocks_per_batch);
1098
1099 mo_vec[1].x = 0;
1100 mo_vec[1].y = 0;
1101
1102 switch (mb->mb_type) {
1103 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1104 {
1105 struct vertex2f *vb;
1106
1107 assert(ref_vb && ref_vb[1]);
1108
1109 vb = ref_vb[1] + pos * 2 * 24;
1110
1111 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1112 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1113
1114 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1115 for (i = 0; i < 24 * 2; i += 2) {
1116 vb[i].x = mo_vec[0].x;
1117 vb[i].y = mo_vec[0].y;
1118 }
1119 }
1120 else {
1121 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1122 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1123
1124 for (i = 0; i < 24 * 2; i += 2) {
1125 vb[i].x = mo_vec[0].x;
1126 vb[i].y = mo_vec[0].y;
1127 vb[i + 1].x = mo_vec[1].x;
1128 vb[i + 1].y = mo_vec[1].y;
1129 }
1130 }
1131
1132 /* fall-through */
1133 }
1134 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1135 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1136 {
1137 struct vertex2f *vb;
1138
1139 assert(ref_vb && ref_vb[0]);
1140
1141 vb = ref_vb[0] + pos * 2 * 24;
1142
1143 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
1144 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1145 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1146
1147 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1148 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1149 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1150 }
1151 }
1152 else {
1153 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
1154 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
1155
1156 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1157 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
1158 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
1159 }
1160 }
1161
1162 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1163 for (i = 0; i < 24 * 2; i += 2) {
1164 vb[i].x = mo_vec[0].x;
1165 vb[i].y = mo_vec[0].y;
1166 }
1167 }
1168 else {
1169 for (i = 0; i < 24 * 2; i += 2) {
1170 vb[i].x = mo_vec[0].x;
1171 vb[i].y = mo_vec[0].y;
1172 vb[i + 1].x = mo_vec[1].x;
1173 vb[i + 1].y = mo_vec[1].y;
1174 }
1175 }
1176
1177 /* fall-through */
1178 }
1179 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
1180 {
1181 const struct vertex2f unit =
1182 {
1183 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
1184 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
1185 };
1186 const struct vertex2f half =
1187 {
1188 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
1189 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
1190 };
1191 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
1192
1193 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
1194
1195 SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
1196 unit.x, unit.y, 0, 0, half.x, half.y,
1197 32, 2, 1, use_zb, r->zero_block);
1198
1199 SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
1200 unit.x, unit.y, half.x, 0, half.x, half.y,
1201 16, 2, 1, use_zb, r->zero_block);
1202
1203 SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
1204 unit.x, unit.y, 0, half.y, half.x, half.y,
1205 8, 2, 1, use_zb, r->zero_block);
1206
1207 SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
1208 unit.x, unit.y, half.x, half.y, half.x, half.y,
1209 4, 2, 1, use_zb, r->zero_block);
1210
1211 break;
1212 }
1213 default:
1214 assert(0);
1215 }
1216 }
1217
1218 static void
1219 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1220 unsigned *num_macroblocks)
1221 {
1222 unsigned offset[NUM_MACROBLOCK_TYPES];
1223 struct vert_stream_0 *ycbcr_vb;
1224 struct vertex2f *ref_vb[2];
1225 unsigned i;
1226
1227 assert(r);
1228 assert(num_macroblocks);
1229
1230 for (i = 0; i < r->num_macroblocks; ++i) {
1231 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1232 ++num_macroblocks[mb_type];
1233 }
1234
1235 offset[0] = 0;
1236
1237 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1238 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1239
1240 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1241 (
1242 r->pipe->screen,
1243 r->vertex_bufs.individual.ycbcr.buffer,
1244 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1245 );
1246
1247 for (i = 0; i < 2; ++i)
1248 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1249 (
1250 r->pipe->screen,
1251 r->vertex_bufs.individual.ref[i].buffer,
1252 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1253 );
1254
1255 for (i = 0; i < r->num_macroblocks; ++i) {
1256 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1257
1258 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1259 ycbcr_vb, ref_vb);
1260
1261 ++offset[mb_type];
1262 }
1263
1264 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
1265 for (i = 0; i < 2; ++i)
1266 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
1267 }
1268
1269 static void
1270 flush(struct vl_mpeg12_mc_renderer *r)
1271 {
1272 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1273 unsigned vb_start = 0;
1274 struct vertex_shader_consts *vs_consts;
1275 unsigned i;
1276
1277 assert(r);
1278 assert(r->num_macroblocks == r->macroblocks_per_batch);
1279
1280 gen_macroblock_stream(r, num_macroblocks);
1281
1282 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1283 (
1284 r->pipe->screen, r->surface,
1285 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1286 );
1287
1288 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1289 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1290 r->pipe->set_scissor_state(r->pipe, &r->scissor);
1291
1292 vs_consts = pipe_buffer_map
1293 (
1294 r->pipe->screen, r->vs_const_buf,
1295 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1296 );
1297
1298 vs_consts->denorm.x = r->surface->width0;
1299 vs_consts->denorm.y = r->surface->height0;
1300
1301 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
1302
1303 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1304 r->vs_const_buf);
1305 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
1306 r->fs_const_buf);
1307
1308 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1309 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1310 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1311 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1312 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1313 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1314 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1315
1316 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1317 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1318 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1319 }
1320
1321 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1322 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1323 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1324 r->textures.individual.ref[0] = r->past;
1325 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1326 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1327 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1328 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1329
1330 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1331 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1332 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1333 }
1334
1335 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1336 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1337 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1338 r->textures.individual.ref[0] = r->past;
1339 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1340 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1341 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1342 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1343
1344 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1345 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1346 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1347 }
1348
1349 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1350 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1351 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1352 r->textures.individual.ref[0] = r->future;
1353 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1354 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1355 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1356 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1357
1358 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1359 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1360 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1361 }
1362
1363 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1364 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1365 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1366 r->textures.individual.ref[0] = r->future;
1367 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1368 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1369 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1370 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1371
1372 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1373 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1374 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1375 }
1376
1377 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1378 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1379 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1380 r->textures.individual.ref[0] = r->past;
1381 r->textures.individual.ref[1] = r->future;
1382 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1383 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1384 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1385 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1386
1387 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1388 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1389 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1390 }
1391
1392 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1393 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1394 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1395 r->textures.individual.ref[0] = r->past;
1396 r->textures.individual.ref[1] = r->future;
1397 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1398 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1399 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1400 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1401
1402 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1403 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1404 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1405 }
1406
1407 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1408 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1409
1410 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1411 for (i = 0; i < 3; ++i)
1412 r->zero_block[i].x = ZERO_BLOCK_NIL;
1413
1414 r->num_macroblocks = 0;
1415 }
1416
1417 static void
1418 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1419 {
1420 unsigned y;
1421
1422 assert(src);
1423 assert(dst);
1424
1425 for (y = 0; y < BLOCK_HEIGHT; ++y)
1426 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1427 }
1428
1429 static void
1430 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1431 {
1432 unsigned y;
1433
1434 assert(src);
1435 assert(dst);
1436
1437 for (y = 0; y < BLOCK_HEIGHT; ++y)
1438 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1439 }
1440
1441 static void
1442 fill_zero_block(short *dst, unsigned dst_pitch)
1443 {
1444 unsigned y;
1445
1446 assert(dst);
1447
1448 for (y = 0; y < BLOCK_HEIGHT; ++y)
1449 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1450 }
1451
1452 static void
1453 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1454 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1455 {
1456 unsigned tex_pitch;
1457 short *texels;
1458 unsigned tb = 0, sb = 0;
1459 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1460 unsigned x, y;
1461
1462 assert(r);
1463 assert(blocks);
1464
1465 tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
1466 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1467
1468 for (y = 0; y < 2; ++y) {
1469 for (x = 0; x < 2; ++x, ++tb) {
1470 if ((cbp >> (5 - tb)) & 1) {
1471 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1472 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1473 texels + y * tex_pitch * BLOCK_WIDTH +
1474 x * BLOCK_WIDTH, tex_pitch);
1475 }
1476 else {
1477 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1478 texels + y * tex_pitch + x * BLOCK_WIDTH,
1479 tex_pitch);
1480 }
1481
1482 ++sb;
1483 }
1484 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1485 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1486 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1487 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1488 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1489 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1490 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1491 }
1492 }
1493 }
1494 }
1495 }
1496
1497 /* TODO: Implement 422, 444 */
1498 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1499
1500 mbpx /= 2;
1501 mbpy /= 2;
1502
1503 for (tb = 0; tb < 2; ++tb) {
1504 tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1505 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1506
1507 if ((cbp >> (1 - tb)) & 1) {
1508 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1509 ++sb;
1510 }
1511 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1512 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1513 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1514 fill_zero_block(texels, tex_pitch);
1515 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1516 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1517 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1518 }
1519 }
1520 }
1521 }
1522 }
1523
1524 static void
1525 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1526 struct pipe_mpeg12_macroblock *mb)
1527 {
1528 assert(r);
1529 assert(mb);
1530 assert(r->num_macroblocks < r->macroblocks_per_batch);
1531
1532 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1533 sizeof(struct pipe_mpeg12_macroblock));
1534
1535 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1536
1537 ++r->num_macroblocks;
1538 }
1539
1540 bool
1541 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1542 struct pipe_context *pipe,
1543 unsigned picture_width,
1544 unsigned picture_height,
1545 enum pipe_video_chroma_format chroma_format,
1546 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1547 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1548 bool pot_buffers)
1549 {
1550 unsigned i;
1551
1552 assert(renderer);
1553 assert(pipe);
1554 /* TODO: Implement other policies */
1555 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1556 /* TODO: Implement this */
1557 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1558 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1559 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1560 assert(pot_buffers);
1561
1562 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1563
1564 renderer->pipe = pipe;
1565 renderer->picture_width = picture_width;
1566 renderer->picture_height = picture_height;
1567 renderer->chroma_format = chroma_format;
1568 renderer->bufmode = bufmode;
1569 renderer->eb_handling = eb_handling;
1570 renderer->pot_buffers = pot_buffers;
1571
1572 if (!init_pipe_state(renderer))
1573 return false;
1574 if (!init_shaders(renderer)) {
1575 cleanup_pipe_state(renderer);
1576 return false;
1577 }
1578 if (!init_buffers(renderer)) {
1579 cleanup_shaders(renderer);
1580 cleanup_pipe_state(renderer);
1581 return false;
1582 }
1583
1584 renderer->surface = NULL;
1585 renderer->past = NULL;
1586 renderer->future = NULL;
1587 for (i = 0; i < 3; ++i)
1588 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1589 renderer->num_macroblocks = 0;
1590
1591 xfer_buffers_map(renderer);
1592
1593 return true;
1594 }
1595
1596 void
1597 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1598 {
1599 assert(renderer);
1600
1601 xfer_buffers_unmap(renderer);
1602
1603 cleanup_pipe_state(renderer);
1604 cleanup_shaders(renderer);
1605 cleanup_buffers(renderer);
1606 }
1607
1608 void
1609 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1610 *renderer,
1611 struct pipe_texture *surface,
1612 struct pipe_texture *past,
1613 struct pipe_texture *future,
1614 unsigned num_macroblocks,
1615 struct pipe_mpeg12_macroblock
1616 *mpeg12_macroblocks,
1617 struct pipe_fence_handle **fence)
1618 {
1619 bool new_surface = false;
1620
1621 assert(renderer);
1622 assert(surface);
1623 assert(num_macroblocks);
1624 assert(mpeg12_macroblocks);
1625
1626 if (renderer->surface) {
1627 if (surface != renderer->surface) {
1628 if (renderer->num_macroblocks > 0) {
1629 xfer_buffers_unmap(renderer);
1630 flush(renderer);
1631 }
1632
1633 new_surface = true;
1634 }
1635
1636 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1637 assert(surface != renderer->surface || renderer->past == past);
1638 assert(surface != renderer->surface || renderer->future == future);
1639 }
1640 else
1641 new_surface = true;
1642
1643 if (new_surface) {
1644 renderer->surface = surface;
1645 renderer->past = past;
1646 renderer->future = future;
1647 renderer->fence = fence;
1648 renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
1649 renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
1650 }
1651
1652 while (num_macroblocks) {
1653 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1654 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1655 unsigned i;
1656
1657 for (i = 0; i < num_to_submit; ++i) {
1658 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1659 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1660 }
1661
1662 num_macroblocks -= num_to_submit;
1663
1664 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1665 xfer_buffers_unmap(renderer);
1666 flush(renderer);
1667 xfer_buffers_map(renderer);
1668 /* Next time we get this surface it may have new ref frames */
1669 renderer->surface = NULL;
1670 }
1671 }
1672 }