Merge branch 'gallium-noblocks'
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_math.h>
33 #include <util/u_memory.h>
34 #include <tgsi/tgsi_parse.h>
35 #include <tgsi/tgsi_build.h>
36 #include "vl_shader_build.h"
37
38 #define DEFAULT_BUF_ALIGNMENT 1
39 #define MACROBLOCK_WIDTH 16
40 #define MACROBLOCK_HEIGHT 16
41 #define BLOCK_WIDTH 8
42 #define BLOCK_HEIGHT 8
43 #define ZERO_BLOCK_NIL -1.0f
44 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
45
46 struct vertex2f
47 {
48 float x, y;
49 };
50
51 struct vertex4f
52 {
53 float x, y, z, w;
54 };
55
56 struct vertex_shader_consts
57 {
58 struct vertex4f denorm;
59 };
60
61 struct fragment_shader_consts
62 {
63 struct vertex4f multiplier;
64 struct vertex4f div;
65 };
66
67 /*
68 * Muliplier renormalizes block samples from 16 bits to 12 bits.
69 * Divider is used when calculating Y % 2 for choosing top or bottom
70 * field for P or B macroblocks.
71 * TODO: Use immediates.
72 */
73 static const struct fragment_shader_consts fs_consts = {
74 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
75 {0.5f, 2.0f, 0.0f, 0.0f}
76 };
77
78 struct vert_stream_0
79 {
80 struct vertex2f pos;
81 struct vertex2f luma_tc;
82 struct vertex2f cb_tc;
83 struct vertex2f cr_tc;
84 };
85
86 enum MACROBLOCK_TYPE
87 {
88 MACROBLOCK_TYPE_INTRA,
89 MACROBLOCK_TYPE_FWD_FRAME_PRED,
90 MACROBLOCK_TYPE_FWD_FIELD_PRED,
91 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
92 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
93 MACROBLOCK_TYPE_BI_FRAME_PRED,
94 MACROBLOCK_TYPE_BI_FIELD_PRED,
95
96 NUM_MACROBLOCK_TYPES
97 };
98
99 static void
100 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
101 {
102 const unsigned max_tokens = 50;
103
104 struct pipe_shader_state vs;
105 struct tgsi_token *tokens;
106 struct tgsi_header *header;
107
108 struct tgsi_full_declaration decl;
109 struct tgsi_full_instruction inst;
110
111 unsigned ti;
112
113 unsigned i;
114
115 assert(r);
116
117 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
118 header = (struct tgsi_header *) &tokens[0];
119 *header = tgsi_build_header();
120 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
121
122 ti = 2;
123
124 /*
125 * decl i0 ; Vertex pos
126 * decl i1 ; Luma texcoords
127 * decl i2 ; Chroma Cb texcoords
128 * decl i3 ; Chroma Cr texcoords
129 */
130 for (i = 0; i < 4; i++) {
131 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
132 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
133 }
134
135 /*
136 * decl o0 ; Vertex pos
137 * decl o1 ; Luma texcoords
138 * decl o2 ; Chroma Cb texcoords
139 * decl o3 ; Chroma Cr texcoords
140 */
141 for (i = 0; i < 4; i++) {
142 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
143 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
144 }
145
146 /*
147 * mov o0, i0 ; Move input vertex pos to output
148 * mov o1, i1 ; Move input luma texcoords to output
149 * mov o2, i2 ; Move input chroma Cb texcoords to output
150 * mov o3, i3 ; Move input chroma Cr texcoords to output
151 */
152 for (i = 0; i < 4; ++i) {
153 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
154 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
155 }
156
157 /* end */
158 inst = vl_end();
159 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
160
161 assert(ti <= max_tokens);
162
163 vs.tokens = tokens;
164 r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
165 free(tokens);
166 }
167
168 static void
169 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
170 {
171 const unsigned max_tokens = 100;
172
173 struct pipe_shader_state fs;
174 struct tgsi_token *tokens;
175 struct tgsi_header *header;
176
177 struct tgsi_full_declaration decl;
178 struct tgsi_full_instruction inst;
179
180 unsigned ti;
181
182 unsigned i;
183
184 assert(r);
185
186 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
187 header = (struct tgsi_header *) &tokens[0];
188 *header = tgsi_build_header();
189 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
190
191 ti = 2;
192
193 /*
194 * decl i0 ; Luma texcoords
195 * decl i1 ; Chroma Cb texcoords
196 * decl i2 ; Chroma Cr texcoords
197 */
198 for (i = 0; i < 3; ++i) {
199 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
200 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
201 }
202
203 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
204 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
205 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
206
207 /* decl o0 ; Fragment color */
208 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
209 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
210
211 /* decl t0, t1 */
212 decl = vl_decl_temps(0, 1);
213 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
214
215 /*
216 * decl s0 ; Sampler for luma texture
217 * decl s1 ; Sampler for chroma Cb texture
218 * decl s2 ; Sampler for chroma Cr texture
219 */
220 for (i = 0; i < 3; ++i) {
221 decl = vl_decl_samplers(i, i);
222 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
223 }
224
225 /*
226 * tex2d t1, i0, s0 ; Read texel from luma texture
227 * mov t0.x, t1.x ; Move luma sample into .x component
228 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
229 * mov t0.y, t1.x ; Move Cb sample into .y component
230 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
231 * mov t0.z, t1.x ; Move Cr sample into .z component
232 */
233 for (i = 0; i < 3; ++i) {
234 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
235 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
236
237 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
238 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
239 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
240 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
241 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
242 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
243 }
244
245 /* mul o0, t0, c0 ; Rescale texel to correct range */
246 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
247 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
248
249 /* end */
250 inst = vl_end();
251 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
252
253 assert(ti <= max_tokens);
254
255 fs.tokens = tokens;
256 r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
257 free(tokens);
258 }
259
260 static void
261 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
262 {
263 const unsigned max_tokens = 100;
264
265 struct pipe_shader_state vs;
266 struct tgsi_token *tokens;
267 struct tgsi_header *header;
268
269 struct tgsi_full_declaration decl;
270 struct tgsi_full_instruction inst;
271
272 unsigned ti;
273
274 unsigned i;
275
276 assert(r);
277
278 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
279 header = (struct tgsi_header *) &tokens[0];
280 *header = tgsi_build_header();
281 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
282
283 ti = 2;
284
285 /*
286 * decl i0 ; Vertex pos
287 * decl i1 ; Luma texcoords
288 * decl i2 ; Chroma Cb texcoords
289 * decl i3 ; Chroma Cr texcoords
290 * decl i4 ; Ref surface top field texcoords
291 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
292 */
293 for (i = 0; i < 6; i++) {
294 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
295 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
296 }
297
298 /*
299 * decl o0 ; Vertex pos
300 * decl o1 ; Luma texcoords
301 * decl o2 ; Chroma Cb texcoords
302 * decl o3 ; Chroma Cr texcoords
303 * decl o4 ; Ref macroblock texcoords
304 */
305 for (i = 0; i < 5; i++) {
306 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
307 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
308 }
309
310 /*
311 * mov o0, i0 ; Move input vertex pos to output
312 * mov o1, i1 ; Move input luma texcoords to output
313 * mov o2, i2 ; Move input chroma Cb texcoords to output
314 * mov o3, i3 ; Move input chroma Cr texcoords to output
315 */
316 for (i = 0; i < 4; ++i) {
317 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
318 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
319 }
320
321 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
322 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
323 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
324
325 /* end */
326 inst = vl_end();
327 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
328
329 assert(ti <= max_tokens);
330
331 vs.tokens = tokens;
332 r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
333 free(tokens);
334 }
335
336 static void
337 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
338 {
339 assert(false);
340 }
341
342 static void
343 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
344 {
345 const unsigned max_tokens = 100;
346
347 struct pipe_shader_state fs;
348 struct tgsi_token *tokens;
349 struct tgsi_header *header;
350
351 struct tgsi_full_declaration decl;
352 struct tgsi_full_instruction inst;
353
354 unsigned ti;
355
356 unsigned i;
357
358 assert(r);
359
360 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
361 header = (struct tgsi_header *) &tokens[0];
362 *header = tgsi_build_header();
363 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
364
365 ti = 2;
366
367 /*
368 * decl i0 ; Luma texcoords
369 * decl i1 ; Chroma Cb texcoords
370 * decl i2 ; Chroma Cr texcoords
371 * decl i3 ; Ref macroblock texcoords
372 */
373 for (i = 0; i < 4; ++i) {
374 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
375 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
376 }
377
378 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
379 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
380 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
381
382 /* decl o0 ; Fragment color */
383 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
384 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
385
386 /* decl t0, t1 */
387 decl = vl_decl_temps(0, 1);
388 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
389
390 /*
391 * decl s0 ; Sampler for luma texture
392 * decl s1 ; Sampler for chroma Cb texture
393 * decl s2 ; Sampler for chroma Cr texture
394 * decl s3 ; Sampler for ref surface texture
395 */
396 for (i = 0; i < 4; ++i) {
397 decl = vl_decl_samplers(i, i);
398 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
399 }
400
401 /*
402 * tex2d t1, i0, s0 ; Read texel from luma texture
403 * mov t0.x, t1.x ; Move luma sample into .x component
404 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
405 * mov t0.y, t1.x ; Move Cb sample into .y component
406 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
407 * mov t0.z, t1.x ; Move Cr sample into .z component
408 */
409 for (i = 0; i < 3; ++i) {
410 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
411 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
412
413 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
414 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
415 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
416 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
417 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
418 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
419 }
420
421 /* mul t0, t0, c0 ; Rescale texel to correct range */
422 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
423 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
424
425 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
426 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
427 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
428
429 /* add o0, t0, t1 ; Add ref and differential to form final output */
430 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
431 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
432
433 /* end */
434 inst = vl_end();
435 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
436
437 assert(ti <= max_tokens);
438
439 fs.tokens = tokens;
440 r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
441 free(tokens);
442 }
443
444 static void
445 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
446 {
447 assert(false);
448 }
449
450 static void
451 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
452 {
453 const unsigned max_tokens = 100;
454
455 struct pipe_shader_state vs;
456 struct tgsi_token *tokens;
457 struct tgsi_header *header;
458
459 struct tgsi_full_declaration decl;
460 struct tgsi_full_instruction inst;
461
462 unsigned ti;
463
464 unsigned i;
465
466 assert(r);
467
468 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
469 header = (struct tgsi_header *) &tokens[0];
470 *header = tgsi_build_header();
471 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
472
473 ti = 2;
474
475 /*
476 * decl i0 ; Vertex pos
477 * decl i1 ; Luma texcoords
478 * decl i2 ; Chroma Cb texcoords
479 * decl i3 ; Chroma Cr texcoords
480 * decl i4 ; First ref macroblock top field texcoords
481 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
482 * decl i6 ; Second ref macroblock top field texcoords
483 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
484 */
485 for (i = 0; i < 8; i++) {
486 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
487 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
488 }
489
490 /*
491 * decl o0 ; Vertex pos
492 * decl o1 ; Luma texcoords
493 * decl o2 ; Chroma Cb texcoords
494 * decl o3 ; Chroma Cr texcoords
495 * decl o4 ; First ref macroblock texcoords
496 * decl o5 ; Second ref macroblock texcoords
497 */
498 for (i = 0; i < 6; i++) {
499 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
500 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
501 }
502
503 /*
504 * mov o0, i0 ; Move input vertex pos to output
505 * mov o1, i1 ; Move input luma texcoords to output
506 * mov o2, i2 ; Move input chroma Cb texcoords to output
507 * mov o3, i3 ; Move input chroma Cr texcoords to output
508 */
509 for (i = 0; i < 4; ++i) {
510 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
511 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
512 }
513
514 /*
515 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
516 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
517 */
518 for (i = 0; i < 2; ++i) {
519 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
520 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
521 }
522
523 /* end */
524 inst = vl_end();
525 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
526
527 assert(ti <= max_tokens);
528
529 vs.tokens = tokens;
530 r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
531 free(tokens);
532 }
533
534 static void
535 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
536 {
537 assert(false);
538 }
539
540 static void
541 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
542 {
543 const unsigned max_tokens = 100;
544
545 struct pipe_shader_state fs;
546 struct tgsi_token *tokens;
547 struct tgsi_header *header;
548
549 struct tgsi_full_declaration decl;
550 struct tgsi_full_instruction inst;
551
552 unsigned ti;
553
554 unsigned i;
555
556 assert(r);
557
558 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
559 header = (struct tgsi_header *) &tokens[0];
560 *header = tgsi_build_header();
561 *(struct tgsi_processor *) &tokens[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
562
563 ti = 2;
564
565 /*
566 * decl i0 ; Luma texcoords
567 * decl i1 ; Chroma Cb texcoords
568 * decl i2 ; Chroma Cr texcoords
569 * decl i3 ; First ref macroblock texcoords
570 * decl i4 ; Second ref macroblock texcoords
571 */
572 for (i = 0; i < 5; ++i) {
573 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
574 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
575 }
576
577 /*
578 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
579 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
580 */
581 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
582 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
583
584 /* decl o0 ; Fragment color */
585 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
586 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
587
588 /* decl t0-t2 */
589 decl = vl_decl_temps(0, 2);
590 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
591
592 /*
593 * decl s0 ; Sampler for luma texture
594 * decl s1 ; Sampler for chroma Cb texture
595 * decl s2 ; Sampler for chroma Cr texture
596 * decl s3 ; Sampler for first ref surface texture
597 * decl s4 ; Sampler for second ref surface texture
598 */
599 for (i = 0; i < 5; ++i) {
600 decl = vl_decl_samplers(i, i);
601 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
602 }
603
604 /*
605 * tex2d t1, i0, s0 ; Read texel from luma texture
606 * mov t0.x, t1.x ; Move luma sample into .x component
607 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
608 * mov t0.y, t1.x ; Move Cb sample into .y component
609 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
610 * mov t0.z, t1.x ; Move Cr sample into .z component
611 */
612 for (i = 0; i < 3; ++i) {
613 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
614 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
615
616 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
617 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
618 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
619 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
620 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
621 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
622 }
623
624 /* mul t0, t0, c0 ; Rescale texel to correct range */
625 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
626 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
627
628 /*
629 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
630 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
631 */
632 for (i = 0; i < 2; ++i) {
633 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
634 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
635 }
636
637 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
638 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
639 inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
640 inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
641 inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
642 inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
643 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
644
645 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
646 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
647 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
648
649 /* end */
650 inst = vl_end();
651 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
652
653 assert(ti <= max_tokens);
654
655 fs.tokens = tokens;
656 r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
657 free(tokens);
658 }
659
660 static void
661 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
662 {
663 assert(false);
664 }
665
666 static void
667 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
668 {
669 unsigned i;
670
671 assert(r);
672
673 for (i = 0; i < 3; ++i) {
674 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
675 (
676 r->pipe->screen, r->textures.all[i],
677 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
678 r->textures.all[i]->width0, r->textures.all[i]->height0
679 );
680
681 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
682 }
683 }
684
685 static void
686 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
687 {
688 unsigned i;
689
690 assert(r);
691
692 for (i = 0; i < 3; ++i) {
693 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
694 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
695 }
696 }
697
698 static bool
699 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
700 {
701 struct pipe_sampler_state sampler;
702 unsigned filters[5];
703 unsigned i;
704
705 assert(r);
706
707 r->viewport.scale[0] = r->pot_buffers ?
708 util_next_power_of_two(r->picture_width) : r->picture_width;
709 r->viewport.scale[1] = r->pot_buffers ?
710 util_next_power_of_two(r->picture_height) : r->picture_height;
711 r->viewport.scale[2] = 1;
712 r->viewport.scale[3] = 1;
713 r->viewport.translate[0] = 0;
714 r->viewport.translate[1] = 0;
715 r->viewport.translate[2] = 0;
716 r->viewport.translate[3] = 0;
717
718 r->scissor.maxx = r->pot_buffers ?
719 util_next_power_of_two(r->picture_width) : r->picture_width;
720 r->scissor.maxy = r->pot_buffers ?
721 util_next_power_of_two(r->picture_height) : r->picture_height;
722
723 r->fb_state.width = r->pot_buffers ?
724 util_next_power_of_two(r->picture_width) : r->picture_width;
725 r->fb_state.height = r->pot_buffers ?
726 util_next_power_of_two(r->picture_height) : r->picture_height;
727 r->fb_state.nr_cbufs = 1;
728 r->fb_state.zsbuf = NULL;
729
730 /* Luma filter */
731 filters[0] = PIPE_TEX_FILTER_NEAREST;
732 /* Chroma filters */
733 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
734 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
735 filters[1] = PIPE_TEX_FILTER_NEAREST;
736 filters[2] = PIPE_TEX_FILTER_NEAREST;
737 }
738 else {
739 filters[1] = PIPE_TEX_FILTER_LINEAR;
740 filters[2] = PIPE_TEX_FILTER_LINEAR;
741 }
742 /* Fwd, bkwd ref filters */
743 filters[3] = PIPE_TEX_FILTER_LINEAR;
744 filters[4] = PIPE_TEX_FILTER_LINEAR;
745
746 for (i = 0; i < 5; ++i) {
747 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
748 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
749 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
750 sampler.min_img_filter = filters[i];
751 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
752 sampler.mag_img_filter = filters[i];
753 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
754 sampler.compare_func = PIPE_FUNC_ALWAYS;
755 sampler.normalized_coords = 1;
756 /*sampler.prefilter = ; */
757 /*sampler.shadow_ambient = ; */
758 /*sampler.lod_bias = ; */
759 sampler.min_lod = 0;
760 /*sampler.max_lod = ; */
761 /*sampler.border_color[i] = ; */
762 /*sampler.max_anisotropy = ; */
763 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
764 }
765
766 return true;
767 }
768
769 static void
770 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
771 {
772 unsigned i;
773
774 assert(r);
775
776 for (i = 0; i < 5; ++i)
777 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
778 }
779
780 static bool
781 init_shaders(struct vl_mpeg12_mc_renderer *r)
782 {
783 assert(r);
784
785 create_intra_vert_shader(r);
786 create_intra_frag_shader(r);
787 create_frame_pred_vert_shader(r);
788 create_frame_pred_frag_shader(r);
789 create_frame_bi_pred_vert_shader(r);
790 create_frame_bi_pred_frag_shader(r);
791
792 return true;
793 }
794
795 static void
796 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
797 {
798 assert(r);
799
800 r->pipe->delete_vs_state(r->pipe, r->i_vs);
801 r->pipe->delete_fs_state(r->pipe, r->i_fs);
802 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
803 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
804 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
805 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
806 }
807
808 static bool
809 init_buffers(struct vl_mpeg12_mc_renderer *r)
810 {
811 struct pipe_texture template;
812
813 const unsigned mbw =
814 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
815 const unsigned mbh =
816 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
817
818 unsigned i;
819
820 assert(r);
821
822 r->macroblocks_per_batch =
823 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
824 r->num_macroblocks = 0;
825 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
826
827 memset(&template, 0, sizeof(struct pipe_texture));
828 template.target = PIPE_TEXTURE_2D;
829 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
830 template.format = PIPE_FORMAT_R16_SNORM;
831 template.last_level = 0;
832 template.width0 = r->pot_buffers ?
833 util_next_power_of_two(r->picture_width) : r->picture_width;
834 template.height0 = r->pot_buffers ?
835 util_next_power_of_two(r->picture_height) : r->picture_height;
836 template.depth0 = 1;
837 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
838
839 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
840
841 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
842 template.width0 = r->pot_buffers ?
843 util_next_power_of_two(r->picture_width / 2) :
844 r->picture_width / 2;
845 template.height0 = r->pot_buffers ?
846 util_next_power_of_two(r->picture_height / 2) :
847 r->picture_height / 2;
848 }
849 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
850 template.height0 = r->pot_buffers ?
851 util_next_power_of_two(r->picture_height / 2) :
852 r->picture_height / 2;
853
854 r->textures.individual.cb =
855 r->pipe->screen->texture_create(r->pipe->screen, &template);
856 r->textures.individual.cr =
857 r->pipe->screen->texture_create(r->pipe->screen, &template);
858
859 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
860 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
861 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
862 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
863 (
864 r->pipe->screen,
865 DEFAULT_BUF_ALIGNMENT,
866 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
867 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
868 );
869
870 for (i = 1; i < 3; ++i) {
871 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
872 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
873 r->vertex_bufs.all[i].buffer_offset = 0;
874 r->vertex_bufs.all[i].buffer = pipe_buffer_create
875 (
876 r->pipe->screen,
877 DEFAULT_BUF_ALIGNMENT,
878 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
879 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
880 );
881 }
882
883 /* Position element */
884 r->vertex_elems[0].src_offset = 0;
885 r->vertex_elems[0].vertex_buffer_index = 0;
886 r->vertex_elems[0].nr_components = 2;
887 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
888
889 /* Luma, texcoord element */
890 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
891 r->vertex_elems[1].vertex_buffer_index = 0;
892 r->vertex_elems[1].nr_components = 2;
893 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
894
895 /* Chroma Cr texcoord element */
896 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
897 r->vertex_elems[2].vertex_buffer_index = 0;
898 r->vertex_elems[2].nr_components = 2;
899 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
900
901 /* Chroma Cb texcoord element */
902 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
903 r->vertex_elems[3].vertex_buffer_index = 0;
904 r->vertex_elems[3].nr_components = 2;
905 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
906
907 /* First ref surface top field texcoord element */
908 r->vertex_elems[4].src_offset = 0;
909 r->vertex_elems[4].vertex_buffer_index = 1;
910 r->vertex_elems[4].nr_components = 2;
911 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
912
913 /* First ref surface bottom field texcoord element */
914 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
915 r->vertex_elems[5].vertex_buffer_index = 1;
916 r->vertex_elems[5].nr_components = 2;
917 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
918
919 /* Second ref surface top field texcoord element */
920 r->vertex_elems[6].src_offset = 0;
921 r->vertex_elems[6].vertex_buffer_index = 2;
922 r->vertex_elems[6].nr_components = 2;
923 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
924
925 /* Second ref surface bottom field texcoord element */
926 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
927 r->vertex_elems[7].vertex_buffer_index = 2;
928 r->vertex_elems[7].nr_components = 2;
929 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
930
931 r->vs_const_buf.buffer = pipe_buffer_create
932 (
933 r->pipe->screen,
934 DEFAULT_BUF_ALIGNMENT,
935 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
936 sizeof(struct vertex_shader_consts)
937 );
938
939 r->fs_const_buf.buffer = pipe_buffer_create
940 (
941 r->pipe->screen,
942 DEFAULT_BUF_ALIGNMENT,
943 PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
944 );
945
946 memcpy
947 (
948 pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
949 &fs_consts, sizeof(struct fragment_shader_consts)
950 );
951
952 pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
953
954 return true;
955 }
956
957 static void
958 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
959 {
960 unsigned i;
961
962 assert(r);
963
964 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
965 pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
966
967 for (i = 0; i < 3; ++i)
968 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
969
970 for (i = 0; i < 3; ++i)
971 pipe_texture_reference(&r->textures.all[i], NULL);
972
973 FREE(r->macroblock_buf);
974 }
975
976 static enum MACROBLOCK_TYPE
977 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
978 {
979 assert(mb);
980
981 switch (mb->mb_type) {
982 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
983 return MACROBLOCK_TYPE_INTRA;
984 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
985 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
986 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
987 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
988 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
989 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
990 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
991 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
992 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
993 default:
994 assert(0);
995 }
996
997 /* Unreachable */
998 return -1;
999 }
1000
1001 /* XXX: One of these days this will have to be killed with fire */
1002 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1003 do { \
1004 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1005 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1006 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1007 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1008 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1009 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1010 \
1011 if (!use_zb || (cbp) & (lm)) \
1012 { \
1013 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1014 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1015 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1016 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1017 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1018 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1019 } \
1020 else \
1021 { \
1022 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1023 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1024 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1025 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1026 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1027 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1028 } \
1029 \
1030 if (!use_zb || (cbp) & (cbm)) \
1031 { \
1032 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1033 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1034 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1035 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1036 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1037 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1038 } \
1039 else \
1040 { \
1041 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1042 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1043 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1044 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1045 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1046 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1047 } \
1048 \
1049 if (!use_zb || (cbp) & (crm)) \
1050 { \
1051 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1052 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1053 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1054 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1055 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1056 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1057 } \
1058 else \
1059 { \
1060 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1061 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1062 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1063 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1064 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1065 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1066 } \
1067 } while (0)
1068
1069 static void
1070 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
1071 struct pipe_mpeg12_macroblock *mb, unsigned pos,
1072 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
1073 {
1074 struct vertex2f mo_vec[2];
1075
1076 unsigned i;
1077
1078 assert(r);
1079 assert(mb);
1080 assert(ycbcr_vb);
1081 assert(pos < r->macroblocks_per_batch);
1082
1083 switch (mb->mb_type) {
1084 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1085 {
1086 struct vertex2f *vb;
1087
1088 assert(ref_vb && ref_vb[1]);
1089
1090 vb = ref_vb[1] + pos * 2 * 24;
1091
1092 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1093 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1094
1095 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1096 for (i = 0; i < 24 * 2; i += 2) {
1097 vb[i].x = mo_vec[0].x;
1098 vb[i].y = mo_vec[0].y;
1099 }
1100 }
1101 else {
1102 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1103 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1104
1105 for (i = 0; i < 24 * 2; i += 2) {
1106 vb[i].x = mo_vec[0].x;
1107 vb[i].y = mo_vec[0].y;
1108 vb[i + 1].x = mo_vec[1].x;
1109 vb[i + 1].y = mo_vec[1].y;
1110 }
1111 }
1112
1113 /* fall-through */
1114 }
1115 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1116 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1117 {
1118 struct vertex2f *vb;
1119
1120 assert(ref_vb && ref_vb[0]);
1121
1122 vb = ref_vb[0] + pos * 2 * 24;
1123
1124 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
1125 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1126 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1127
1128 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1129 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1130 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1131 }
1132 }
1133 else {
1134 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
1135 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
1136
1137 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1138 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
1139 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
1140 }
1141 }
1142
1143 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1144 for (i = 0; i < 24 * 2; i += 2) {
1145 vb[i].x = mo_vec[0].x;
1146 vb[i].y = mo_vec[0].y;
1147 }
1148 }
1149 else {
1150 for (i = 0; i < 24 * 2; i += 2) {
1151 vb[i].x = mo_vec[0].x;
1152 vb[i].y = mo_vec[0].y;
1153 vb[i + 1].x = mo_vec[1].x;
1154 vb[i + 1].y = mo_vec[1].y;
1155 }
1156 }
1157
1158 /* fall-through */
1159 }
1160 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
1161 {
1162 const struct vertex2f unit =
1163 {
1164 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
1165 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
1166 };
1167 const struct vertex2f half =
1168 {
1169 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
1170 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
1171 };
1172 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
1173
1174 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
1175
1176 SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
1177 unit.x, unit.y, 0, 0, half.x, half.y,
1178 32, 2, 1, use_zb, r->zero_block);
1179
1180 SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
1181 unit.x, unit.y, half.x, 0, half.x, half.y,
1182 16, 2, 1, use_zb, r->zero_block);
1183
1184 SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
1185 unit.x, unit.y, 0, half.y, half.x, half.y,
1186 8, 2, 1, use_zb, r->zero_block);
1187
1188 SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
1189 unit.x, unit.y, half.x, half.y, half.x, half.y,
1190 4, 2, 1, use_zb, r->zero_block);
1191
1192 break;
1193 }
1194 default:
1195 assert(0);
1196 }
1197 }
1198
1199 static void
1200 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1201 unsigned *num_macroblocks)
1202 {
1203 unsigned offset[NUM_MACROBLOCK_TYPES];
1204 struct vert_stream_0 *ycbcr_vb;
1205 struct vertex2f *ref_vb[2];
1206 unsigned i;
1207
1208 assert(r);
1209 assert(num_macroblocks);
1210
1211 for (i = 0; i < r->num_macroblocks; ++i) {
1212 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1213 ++num_macroblocks[mb_type];
1214 }
1215
1216 offset[0] = 0;
1217
1218 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1219 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1220
1221 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1222 (
1223 r->pipe->screen,
1224 r->vertex_bufs.individual.ycbcr.buffer,
1225 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1226 );
1227
1228 for (i = 0; i < 2; ++i)
1229 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1230 (
1231 r->pipe->screen,
1232 r->vertex_bufs.individual.ref[i].buffer,
1233 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1234 );
1235
1236 for (i = 0; i < r->num_macroblocks; ++i) {
1237 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1238
1239 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1240 ycbcr_vb, ref_vb);
1241
1242 ++offset[mb_type];
1243 }
1244
1245 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
1246 for (i = 0; i < 2; ++i)
1247 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
1248 }
1249
1250 static void
1251 flush(struct vl_mpeg12_mc_renderer *r)
1252 {
1253 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1254 unsigned vb_start = 0;
1255 struct vertex_shader_consts *vs_consts;
1256 unsigned i;
1257
1258 assert(r);
1259 assert(r->num_macroblocks == r->macroblocks_per_batch);
1260
1261 gen_macroblock_stream(r, num_macroblocks);
1262
1263 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1264 (
1265 r->pipe->screen, r->surface,
1266 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1267 );
1268
1269 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1270 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1271 r->pipe->set_scissor_state(r->pipe, &r->scissor);
1272
1273 vs_consts = pipe_buffer_map
1274 (
1275 r->pipe->screen, r->vs_const_buf.buffer,
1276 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1277 );
1278
1279 vs_consts->denorm.x = r->surface->width0;
1280 vs_consts->denorm.y = r->surface->height0;
1281
1282 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1283
1284 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1285 &r->vs_const_buf);
1286 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
1287 &r->fs_const_buf);
1288
1289 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1290 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1291 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1292 r->pipe->set_fragment_sampler_textures(r->pipe, 3, r->textures.all);
1293 r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
1294 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1295 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1296
1297 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1298 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1299 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1300 }
1301
1302 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1303 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1304 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1305 r->textures.individual.ref[0] = r->past;
1306 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1307 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1308 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1309 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1310
1311 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1312 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1313 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1314 }
1315
1316 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1317 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1318 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1319 r->textures.individual.ref[0] = r->past;
1320 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1321 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1322 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1323 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1324
1325 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1326 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1327 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1328 }
1329
1330 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1331 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1332 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1333 r->textures.individual.ref[0] = r->future;
1334 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1335 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1336 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1337 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1338
1339 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1340 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1341 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1342 }
1343
1344 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1345 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1346 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1347 r->textures.individual.ref[0] = r->future;
1348 r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
1349 r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
1350 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1351 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1352
1353 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1354 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1355 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1356 }
1357
1358 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1359 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1360 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1361 r->textures.individual.ref[0] = r->past;
1362 r->textures.individual.ref[1] = r->future;
1363 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1364 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1365 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1366 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1367
1368 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1369 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1370 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1371 }
1372
1373 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1374 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1375 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1376 r->textures.individual.ref[0] = r->past;
1377 r->textures.individual.ref[1] = r->future;
1378 r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
1379 r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
1380 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1381 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1382
1383 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1384 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1385 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1386 }
1387
1388 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1389 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1390
1391 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1392 for (i = 0; i < 3; ++i)
1393 r->zero_block[i].x = ZERO_BLOCK_NIL;
1394
1395 r->num_macroblocks = 0;
1396 }
1397
1398 static void
1399 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1400 {
1401 unsigned y;
1402
1403 assert(src);
1404 assert(dst);
1405
1406 for (y = 0; y < BLOCK_HEIGHT; ++y)
1407 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1408 }
1409
1410 static void
1411 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1412 {
1413 unsigned y;
1414
1415 assert(src);
1416 assert(dst);
1417
1418 for (y = 0; y < BLOCK_HEIGHT; ++y)
1419 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1420 }
1421
1422 static void
1423 fill_zero_block(short *dst, unsigned dst_pitch)
1424 {
1425 unsigned y;
1426
1427 assert(dst);
1428
1429 for (y = 0; y < BLOCK_HEIGHT; ++y)
1430 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1431 }
1432
1433 static void
1434 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1435 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1436 {
1437 unsigned tex_pitch;
1438 short *texels;
1439 unsigned tb = 0, sb = 0;
1440 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1441 unsigned x, y;
1442
1443 assert(r);
1444 assert(blocks);
1445
1446 tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format);
1447 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1448
1449 for (y = 0; y < 2; ++y) {
1450 for (x = 0; x < 2; ++x, ++tb) {
1451 if ((cbp >> (5 - tb)) & 1) {
1452 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1453 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1454 texels + y * tex_pitch * BLOCK_WIDTH +
1455 x * BLOCK_WIDTH, tex_pitch);
1456 }
1457 else {
1458 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1459 texels + y * tex_pitch + x * BLOCK_WIDTH,
1460 tex_pitch);
1461 }
1462
1463 ++sb;
1464 }
1465 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1466 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1467 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1468 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1469 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1470 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1471 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1472 }
1473 }
1474 }
1475 }
1476 }
1477
1478 /* TODO: Implement 422, 444 */
1479 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1480
1481 mbpx /= 2;
1482 mbpy /= 2;
1483
1484 for (tb = 0; tb < 2; ++tb) {
1485 tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
1486 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1487
1488 if ((cbp >> (1 - tb)) & 1) {
1489 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1490 ++sb;
1491 }
1492 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1493 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1494 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1495 fill_zero_block(texels, tex_pitch);
1496 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1497 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1498 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1499 }
1500 }
1501 }
1502 }
1503 }
1504
1505 static void
1506 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1507 struct pipe_mpeg12_macroblock *mb)
1508 {
1509 assert(r);
1510 assert(mb);
1511 assert(r->num_macroblocks < r->macroblocks_per_batch);
1512
1513 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1514 sizeof(struct pipe_mpeg12_macroblock));
1515
1516 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1517
1518 ++r->num_macroblocks;
1519 }
1520
1521 bool
1522 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1523 struct pipe_context *pipe,
1524 unsigned picture_width,
1525 unsigned picture_height,
1526 enum pipe_video_chroma_format chroma_format,
1527 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1528 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1529 bool pot_buffers)
1530 {
1531 unsigned i;
1532
1533 assert(renderer);
1534 assert(pipe);
1535 /* TODO: Implement other policies */
1536 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1537 /* TODO: Implement this */
1538 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1539 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1540 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1541 assert(pot_buffers);
1542
1543 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1544
1545 renderer->pipe = pipe;
1546 renderer->picture_width = picture_width;
1547 renderer->picture_height = picture_height;
1548 renderer->chroma_format = chroma_format;
1549 renderer->bufmode = bufmode;
1550 renderer->eb_handling = eb_handling;
1551 renderer->pot_buffers = pot_buffers;
1552
1553 if (!init_pipe_state(renderer))
1554 return false;
1555 if (!init_shaders(renderer)) {
1556 cleanup_pipe_state(renderer);
1557 return false;
1558 }
1559 if (!init_buffers(renderer)) {
1560 cleanup_shaders(renderer);
1561 cleanup_pipe_state(renderer);
1562 return false;
1563 }
1564
1565 renderer->surface = NULL;
1566 renderer->past = NULL;
1567 renderer->future = NULL;
1568 for (i = 0; i < 3; ++i)
1569 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1570 renderer->num_macroblocks = 0;
1571
1572 xfer_buffers_map(renderer);
1573
1574 return true;
1575 }
1576
1577 void
1578 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1579 {
1580 assert(renderer);
1581
1582 xfer_buffers_unmap(renderer);
1583
1584 cleanup_pipe_state(renderer);
1585 cleanup_shaders(renderer);
1586 cleanup_buffers(renderer);
1587 }
1588
1589 void
1590 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1591 *renderer,
1592 struct pipe_texture *surface,
1593 struct pipe_texture *past,
1594 struct pipe_texture *future,
1595 unsigned num_macroblocks,
1596 struct pipe_mpeg12_macroblock
1597 *mpeg12_macroblocks,
1598 struct pipe_fence_handle **fence)
1599 {
1600 bool new_surface = false;
1601
1602 assert(renderer);
1603 assert(surface);
1604 assert(num_macroblocks);
1605 assert(mpeg12_macroblocks);
1606
1607 if (renderer->surface) {
1608 if (surface != renderer->surface) {
1609 if (renderer->num_macroblocks > 0) {
1610 xfer_buffers_unmap(renderer);
1611 flush(renderer);
1612 }
1613
1614 new_surface = true;
1615 }
1616
1617 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1618 assert(surface != renderer->surface || renderer->past == past);
1619 assert(surface != renderer->surface || renderer->future == future);
1620 }
1621 else
1622 new_surface = true;
1623
1624 if (new_surface) {
1625 renderer->surface = surface;
1626 renderer->past = past;
1627 renderer->future = future;
1628 renderer->fence = fence;
1629 renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
1630 renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
1631 }
1632
1633 while (num_macroblocks) {
1634 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1635 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1636 unsigned i;
1637
1638 for (i = 0; i < num_to_submit; ++i) {
1639 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1640 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1641 }
1642
1643 num_macroblocks -= num_to_submit;
1644
1645 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1646 xfer_buffers_unmap(renderer);
1647 flush(renderer);
1648 xfer_buffers_map(renderer);
1649 /* Next time we get this surface it may have new ref frames */
1650 renderer->surface = NULL;
1651 }
1652 }
1653 }