Merge branch 'mesa_7_6_branch' into mesa_7_7_branch
[mesa.git] / src / gallium / auxiliary / vl / vl_mpeg12_mc_renderer.c
1 /**************************************************************************
2 *
3 * Copyright 2009 Younes Manton.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "vl_mpeg12_mc_renderer.h"
29 #include <assert.h>
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_math.h>
33 #include <util/u_memory.h>
34 #include <tgsi/tgsi_parse.h>
35 #include <tgsi/tgsi_build.h>
36 #include "vl_shader_build.h"
37
38 #define DEFAULT_BUF_ALIGNMENT 1
39 #define MACROBLOCK_WIDTH 16
40 #define MACROBLOCK_HEIGHT 16
41 #define BLOCK_WIDTH 8
42 #define BLOCK_HEIGHT 8
43 #define ZERO_BLOCK_NIL -1.0f
44 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
45
46 struct vertex2f
47 {
48 float x, y;
49 };
50
51 struct vertex4f
52 {
53 float x, y, z, w;
54 };
55
56 struct vertex_shader_consts
57 {
58 struct vertex4f denorm;
59 };
60
61 struct fragment_shader_consts
62 {
63 struct vertex4f multiplier;
64 struct vertex4f div;
65 };
66
67 /*
68 * Muliplier renormalizes block samples from 16 bits to 12 bits.
69 * Divider is used when calculating Y % 2 for choosing top or bottom
70 * field for P or B macroblocks.
71 * TODO: Use immediates.
72 */
73 static const struct fragment_shader_consts fs_consts = {
74 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
75 {0.5f, 2.0f, 0.0f, 0.0f}
76 };
77
78 struct vert_stream_0
79 {
80 struct vertex2f pos;
81 struct vertex2f luma_tc;
82 struct vertex2f cb_tc;
83 struct vertex2f cr_tc;
84 };
85
86 enum MACROBLOCK_TYPE
87 {
88 MACROBLOCK_TYPE_INTRA,
89 MACROBLOCK_TYPE_FWD_FRAME_PRED,
90 MACROBLOCK_TYPE_FWD_FIELD_PRED,
91 MACROBLOCK_TYPE_BKWD_FRAME_PRED,
92 MACROBLOCK_TYPE_BKWD_FIELD_PRED,
93 MACROBLOCK_TYPE_BI_FRAME_PRED,
94 MACROBLOCK_TYPE_BI_FIELD_PRED,
95
96 NUM_MACROBLOCK_TYPES
97 };
98
99 static void
100 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
101 {
102 const unsigned max_tokens = 50;
103
104 struct pipe_shader_state vs;
105 struct tgsi_token *tokens;
106 struct tgsi_header *header;
107
108 struct tgsi_full_declaration decl;
109 struct tgsi_full_instruction inst;
110
111 unsigned ti;
112
113 unsigned i;
114
115 assert(r);
116
117 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
118 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
119 header = (struct tgsi_header *) &tokens[1];
120 *header = tgsi_build_header();
121 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
122
123 ti = 3;
124
125 /*
126 * decl i0 ; Vertex pos
127 * decl i1 ; Luma texcoords
128 * decl i2 ; Chroma Cb texcoords
129 * decl i3 ; Chroma Cr texcoords
130 */
131 for (i = 0; i < 4; i++) {
132 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
133 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
134 }
135
136 /*
137 * decl o0 ; Vertex pos
138 * decl o1 ; Luma texcoords
139 * decl o2 ; Chroma Cb texcoords
140 * decl o3 ; Chroma Cr texcoords
141 */
142 for (i = 0; i < 4; i++) {
143 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
144 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
145 }
146
147 /*
148 * mov o0, i0 ; Move input vertex pos to output
149 * mov o1, i1 ; Move input luma texcoords to output
150 * mov o2, i2 ; Move input chroma Cb texcoords to output
151 * mov o3, i3 ; Move input chroma Cr texcoords to output
152 */
153 for (i = 0; i < 4; ++i) {
154 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
155 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
156 }
157
158 /* end */
159 inst = vl_end();
160 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
161
162 assert(ti <= max_tokens);
163
164 vs.tokens = tokens;
165 r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
166 free(tokens);
167 }
168
169 static void
170 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
171 {
172 const unsigned max_tokens = 100;
173
174 struct pipe_shader_state fs;
175 struct tgsi_token *tokens;
176 struct tgsi_header *header;
177
178 struct tgsi_full_declaration decl;
179 struct tgsi_full_instruction inst;
180
181 unsigned ti;
182
183 unsigned i;
184
185 assert(r);
186
187 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
188 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
189 header = (struct tgsi_header *) &tokens[1];
190 *header = tgsi_build_header();
191 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
192
193 ti = 3;
194
195 /*
196 * decl i0 ; Luma texcoords
197 * decl i1 ; Chroma Cb texcoords
198 * decl i2 ; Chroma Cr texcoords
199 */
200 for (i = 0; i < 3; ++i) {
201 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
202 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
203 }
204
205 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
206 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
207 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
208
209 /* decl o0 ; Fragment color */
210 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
211 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
212
213 /* decl t0, t1 */
214 decl = vl_decl_temps(0, 1);
215 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
216
217 /*
218 * decl s0 ; Sampler for luma texture
219 * decl s1 ; Sampler for chroma Cb texture
220 * decl s2 ; Sampler for chroma Cr texture
221 */
222 for (i = 0; i < 3; ++i) {
223 decl = vl_decl_samplers(i, i);
224 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
225 }
226
227 /*
228 * tex2d t1, i0, s0 ; Read texel from luma texture
229 * mov t0.x, t1.x ; Move luma sample into .x component
230 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
231 * mov t0.y, t1.x ; Move Cb sample into .y component
232 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
233 * mov t0.z, t1.x ; Move Cr sample into .z component
234 */
235 for (i = 0; i < 3; ++i) {
236 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
237 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
238
239 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
240 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
241 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
242 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
243 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
244 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
245 }
246
247 /* mul o0, t0, c0 ; Rescale texel to correct range */
248 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
249 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
250
251 /* end */
252 inst = vl_end();
253 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
254
255 assert(ti <= max_tokens);
256
257 fs.tokens = tokens;
258 r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
259 free(tokens);
260 }
261
262 static void
263 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
264 {
265 const unsigned max_tokens = 100;
266
267 struct pipe_shader_state vs;
268 struct tgsi_token *tokens;
269 struct tgsi_header *header;
270
271 struct tgsi_full_declaration decl;
272 struct tgsi_full_instruction inst;
273
274 unsigned ti;
275
276 unsigned i;
277
278 assert(r);
279
280 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
281 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
282 header = (struct tgsi_header *) &tokens[1];
283 *header = tgsi_build_header();
284 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
285
286 ti = 3;
287
288 /*
289 * decl i0 ; Vertex pos
290 * decl i1 ; Luma texcoords
291 * decl i2 ; Chroma Cb texcoords
292 * decl i3 ; Chroma Cr texcoords
293 * decl i4 ; Ref surface top field texcoords
294 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
295 */
296 for (i = 0; i < 6; i++) {
297 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
298 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
299 }
300
301 /*
302 * decl o0 ; Vertex pos
303 * decl o1 ; Luma texcoords
304 * decl o2 ; Chroma Cb texcoords
305 * decl o3 ; Chroma Cr texcoords
306 * decl o4 ; Ref macroblock texcoords
307 */
308 for (i = 0; i < 5; i++) {
309 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
310 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
311 }
312
313 /*
314 * mov o0, i0 ; Move input vertex pos to output
315 * mov o1, i1 ; Move input luma texcoords to output
316 * mov o2, i2 ; Move input chroma Cb texcoords to output
317 * mov o3, i3 ; Move input chroma Cr texcoords to output
318 */
319 for (i = 0; i < 4; ++i) {
320 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
321 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
322 }
323
324 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
325 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
326 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
327
328 /* end */
329 inst = vl_end();
330 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
331
332 assert(ti <= max_tokens);
333
334 vs.tokens = tokens;
335 r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
336 free(tokens);
337 }
338
339 static void
340 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
341 {
342 assert(false);
343 }
344
345 static void
346 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
347 {
348 const unsigned max_tokens = 100;
349
350 struct pipe_shader_state fs;
351 struct tgsi_token *tokens;
352 struct tgsi_header *header;
353
354 struct tgsi_full_declaration decl;
355 struct tgsi_full_instruction inst;
356
357 unsigned ti;
358
359 unsigned i;
360
361 assert(r);
362
363 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
364 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
365 header = (struct tgsi_header *) &tokens[1];
366 *header = tgsi_build_header();
367 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
368
369 ti = 3;
370
371 /*
372 * decl i0 ; Luma texcoords
373 * decl i1 ; Chroma Cb texcoords
374 * decl i2 ; Chroma Cr texcoords
375 * decl i3 ; Ref macroblock texcoords
376 */
377 for (i = 0; i < 4; ++i) {
378 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
379 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
380 }
381
382 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
383 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
384 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
385
386 /* decl o0 ; Fragment color */
387 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
388 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
389
390 /* decl t0, t1 */
391 decl = vl_decl_temps(0, 1);
392 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
393
394 /*
395 * decl s0 ; Sampler for luma texture
396 * decl s1 ; Sampler for chroma Cb texture
397 * decl s2 ; Sampler for chroma Cr texture
398 * decl s3 ; Sampler for ref surface texture
399 */
400 for (i = 0; i < 4; ++i) {
401 decl = vl_decl_samplers(i, i);
402 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
403 }
404
405 /*
406 * tex2d t1, i0, s0 ; Read texel from luma texture
407 * mov t0.x, t1.x ; Move luma sample into .x component
408 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
409 * mov t0.y, t1.x ; Move Cb sample into .y component
410 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
411 * mov t0.z, t1.x ; Move Cr sample into .z component
412 */
413 for (i = 0; i < 3; ++i) {
414 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
415 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
416
417 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
418 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
419 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
420 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
421 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
422 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
423 }
424
425 /* mul t0, t0, c0 ; Rescale texel to correct range */
426 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
427 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
428
429 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
430 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
431 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
432
433 /* add o0, t0, t1 ; Add ref and differential to form final output */
434 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
435 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
436
437 /* end */
438 inst = vl_end();
439 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
440
441 assert(ti <= max_tokens);
442
443 fs.tokens = tokens;
444 r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
445 free(tokens);
446 }
447
448 static void
449 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
450 {
451 assert(false);
452 }
453
454 static void
455 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
456 {
457 const unsigned max_tokens = 100;
458
459 struct pipe_shader_state vs;
460 struct tgsi_token *tokens;
461 struct tgsi_header *header;
462
463 struct tgsi_full_declaration decl;
464 struct tgsi_full_instruction inst;
465
466 unsigned ti;
467
468 unsigned i;
469
470 assert(r);
471
472 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
473 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
474 header = (struct tgsi_header *) &tokens[1];
475 *header = tgsi_build_header();
476 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
477
478 ti = 3;
479
480 /*
481 * decl i0 ; Vertex pos
482 * decl i1 ; Luma texcoords
483 * decl i2 ; Chroma Cb texcoords
484 * decl i3 ; Chroma Cr texcoords
485 * decl i4 ; First ref macroblock top field texcoords
486 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
487 * decl i6 ; Second ref macroblock top field texcoords
488 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
489 */
490 for (i = 0; i < 8; i++) {
491 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
492 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
493 }
494
495 /*
496 * decl o0 ; Vertex pos
497 * decl o1 ; Luma texcoords
498 * decl o2 ; Chroma Cb texcoords
499 * decl o3 ; Chroma Cr texcoords
500 * decl o4 ; First ref macroblock texcoords
501 * decl o5 ; Second ref macroblock texcoords
502 */
503 for (i = 0; i < 6; i++) {
504 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
505 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
506 }
507
508 /*
509 * mov o0, i0 ; Move input vertex pos to output
510 * mov o1, i1 ; Move input luma texcoords to output
511 * mov o2, i2 ; Move input chroma Cb texcoords to output
512 * mov o3, i3 ; Move input chroma Cr texcoords to output
513 */
514 for (i = 0; i < 4; ++i) {
515 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
516 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
517 }
518
519 /*
520 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
521 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
522 */
523 for (i = 0; i < 2; ++i) {
524 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
525 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
526 }
527
528 /* end */
529 inst = vl_end();
530 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
531
532 assert(ti <= max_tokens);
533
534 vs.tokens = tokens;
535 r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
536 free(tokens);
537 }
538
539 static void
540 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
541 {
542 assert(false);
543 }
544
545 static void
546 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
547 {
548 const unsigned max_tokens = 100;
549
550 struct pipe_shader_state fs;
551 struct tgsi_token *tokens;
552 struct tgsi_header *header;
553
554 struct tgsi_full_declaration decl;
555 struct tgsi_full_instruction inst;
556
557 unsigned ti;
558
559 unsigned i;
560
561 assert(r);
562
563 tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
564 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
565 header = (struct tgsi_header *) &tokens[1];
566 *header = tgsi_build_header();
567 *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
568
569 ti = 3;
570
571 /*
572 * decl i0 ; Luma texcoords
573 * decl i1 ; Chroma Cb texcoords
574 * decl i2 ; Chroma Cr texcoords
575 * decl i3 ; First ref macroblock texcoords
576 * decl i4 ; Second ref macroblock texcoords
577 */
578 for (i = 0; i < 5; ++i) {
579 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
580 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
581 }
582
583 /*
584 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
585 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
586 */
587 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
588 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
589
590 /* decl o0 ; Fragment color */
591 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
592 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
593
594 /* decl t0-t2 */
595 decl = vl_decl_temps(0, 2);
596 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
597
598 /*
599 * decl s0 ; Sampler for luma texture
600 * decl s1 ; Sampler for chroma Cb texture
601 * decl s2 ; Sampler for chroma Cr texture
602 * decl s3 ; Sampler for first ref surface texture
603 * decl s4 ; Sampler for second ref surface texture
604 */
605 for (i = 0; i < 5; ++i) {
606 decl = vl_decl_samplers(i, i);
607 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
608 }
609
610 /*
611 * tex2d t1, i0, s0 ; Read texel from luma texture
612 * mov t0.x, t1.x ; Move luma sample into .x component
613 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
614 * mov t0.y, t1.x ; Move Cb sample into .y component
615 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
616 * mov t0.z, t1.x ; Move Cr sample into .z component
617 */
618 for (i = 0; i < 3; ++i) {
619 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
620 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
621
622 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
623 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
624 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
625 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
626 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
627 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
628 }
629
630 /* mul t0, t0, c0 ; Rescale texel to correct range */
631 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
632 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
633
634 /*
635 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
636 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
637 */
638 for (i = 0; i < 2; ++i) {
639 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
640 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
641 }
642
643 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
644 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
645 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
646 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
647 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
648 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
649 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
650
651 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
652 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
653 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
654
655 /* end */
656 inst = vl_end();
657 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
658
659 assert(ti <= max_tokens);
660
661 fs.tokens = tokens;
662 r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
663 free(tokens);
664 }
665
666 static void
667 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
668 {
669 assert(false);
670 }
671
672 static void
673 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
674 {
675 unsigned i;
676
677 assert(r);
678
679 for (i = 0; i < 3; ++i) {
680 r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
681 (
682 r->pipe->screen, r->textures.all[i],
683 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
684 r->textures.all[i]->width[0], r->textures.all[i]->height[0]
685 );
686
687 r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
688 }
689 }
690
691 static void
692 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
693 {
694 unsigned i;
695
696 assert(r);
697
698 for (i = 0; i < 3; ++i) {
699 r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
700 r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
701 }
702 }
703
704 static bool
705 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
706 {
707 struct pipe_sampler_state sampler;
708 unsigned filters[5];
709 unsigned i;
710
711 assert(r);
712
713 r->viewport.scale[0] = r->pot_buffers ?
714 util_next_power_of_two(r->picture_width) : r->picture_width;
715 r->viewport.scale[1] = r->pot_buffers ?
716 util_next_power_of_two(r->picture_height) : r->picture_height;
717 r->viewport.scale[2] = 1;
718 r->viewport.scale[3] = 1;
719 r->viewport.translate[0] = 0;
720 r->viewport.translate[1] = 0;
721 r->viewport.translate[2] = 0;
722 r->viewport.translate[3] = 0;
723
724 r->scissor.maxx = r->pot_buffers ?
725 util_next_power_of_two(r->picture_width) : r->picture_width;
726 r->scissor.maxy = r->pot_buffers ?
727 util_next_power_of_two(r->picture_height) : r->picture_height;
728
729 r->fb_state.width = r->pot_buffers ?
730 util_next_power_of_two(r->picture_width) : r->picture_width;
731 r->fb_state.height = r->pot_buffers ?
732 util_next_power_of_two(r->picture_height) : r->picture_height;
733 r->fb_state.nr_cbufs = 1;
734 r->fb_state.zsbuf = NULL;
735
736 /* Luma filter */
737 filters[0] = PIPE_TEX_FILTER_NEAREST;
738 /* Chroma filters */
739 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
740 r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
741 filters[1] = PIPE_TEX_FILTER_NEAREST;
742 filters[2] = PIPE_TEX_FILTER_NEAREST;
743 }
744 else {
745 filters[1] = PIPE_TEX_FILTER_LINEAR;
746 filters[2] = PIPE_TEX_FILTER_LINEAR;
747 }
748 /* Fwd, bkwd ref filters */
749 filters[3] = PIPE_TEX_FILTER_LINEAR;
750 filters[4] = PIPE_TEX_FILTER_LINEAR;
751
752 for (i = 0; i < 5; ++i) {
753 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
754 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
755 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
756 sampler.min_img_filter = filters[i];
757 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
758 sampler.mag_img_filter = filters[i];
759 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
760 sampler.compare_func = PIPE_FUNC_ALWAYS;
761 sampler.normalized_coords = 1;
762 /*sampler.prefilter = ; */
763 /*sampler.shadow_ambient = ; */
764 /*sampler.lod_bias = ; */
765 sampler.min_lod = 0;
766 /*sampler.max_lod = ; */
767 /*sampler.border_color[i] = ; */
768 /*sampler.max_anisotropy = ; */
769 r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
770 }
771
772 return true;
773 }
774
775 static void
776 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
777 {
778 unsigned i;
779
780 assert(r);
781
782 for (i = 0; i < 5; ++i)
783 r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
784 }
785
786 static bool
787 init_shaders(struct vl_mpeg12_mc_renderer *r)
788 {
789 assert(r);
790
791 create_intra_vert_shader(r);
792 create_intra_frag_shader(r);
793 create_frame_pred_vert_shader(r);
794 create_frame_pred_frag_shader(r);
795 create_frame_bi_pred_vert_shader(r);
796 create_frame_bi_pred_frag_shader(r);
797
798 return true;
799 }
800
801 static void
802 cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
803 {
804 assert(r);
805
806 r->pipe->delete_vs_state(r->pipe, r->i_vs);
807 r->pipe->delete_fs_state(r->pipe, r->i_fs);
808 r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
809 r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
810 r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
811 r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
812 }
813
814 static bool
815 init_buffers(struct vl_mpeg12_mc_renderer *r)
816 {
817 struct pipe_texture template;
818
819 const unsigned mbw =
820 align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
821 const unsigned mbh =
822 align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
823
824 unsigned i;
825
826 assert(r);
827
828 r->macroblocks_per_batch =
829 mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
830 r->num_macroblocks = 0;
831 r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
832
833 memset(&template, 0, sizeof(struct pipe_texture));
834 template.target = PIPE_TEXTURE_2D;
835 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
836 template.format = PIPE_FORMAT_R16_SNORM;
837 template.last_level = 0;
838 template.width[0] = r->pot_buffers ?
839 util_next_power_of_two(r->picture_width) : r->picture_width;
840 template.height[0] = r->pot_buffers ?
841 util_next_power_of_two(r->picture_height) : r->picture_height;
842 template.depth[0] = 1;
843 pf_get_block(template.format, &template.block);
844 template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
845
846 r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
847
848 if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
849 template.width[0] = r->pot_buffers ?
850 util_next_power_of_two(r->picture_width / 2) :
851 r->picture_width / 2;
852 template.height[0] = r->pot_buffers ?
853 util_next_power_of_two(r->picture_height / 2) :
854 r->picture_height / 2;
855 }
856 else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
857 template.height[0] = r->pot_buffers ?
858 util_next_power_of_two(r->picture_height / 2) :
859 r->picture_height / 2;
860
861 r->textures.individual.cb =
862 r->pipe->screen->texture_create(r->pipe->screen, &template);
863 r->textures.individual.cr =
864 r->pipe->screen->texture_create(r->pipe->screen, &template);
865
866 r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
867 r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
868 r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
869 r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
870 (
871 r->pipe->screen,
872 DEFAULT_BUF_ALIGNMENT,
873 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
874 sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
875 );
876
877 for (i = 1; i < 3; ++i) {
878 r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
879 r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
880 r->vertex_bufs.all[i].buffer_offset = 0;
881 r->vertex_bufs.all[i].buffer = pipe_buffer_create
882 (
883 r->pipe->screen,
884 DEFAULT_BUF_ALIGNMENT,
885 PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD,
886 sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
887 );
888 }
889
890 /* Position element */
891 r->vertex_elems[0].src_offset = 0;
892 r->vertex_elems[0].vertex_buffer_index = 0;
893 r->vertex_elems[0].nr_components = 2;
894 r->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
895
896 /* Luma, texcoord element */
897 r->vertex_elems[1].src_offset = sizeof(struct vertex2f);
898 r->vertex_elems[1].vertex_buffer_index = 0;
899 r->vertex_elems[1].nr_components = 2;
900 r->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
901
902 /* Chroma Cr texcoord element */
903 r->vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
904 r->vertex_elems[2].vertex_buffer_index = 0;
905 r->vertex_elems[2].nr_components = 2;
906 r->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
907
908 /* Chroma Cb texcoord element */
909 r->vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
910 r->vertex_elems[3].vertex_buffer_index = 0;
911 r->vertex_elems[3].nr_components = 2;
912 r->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
913
914 /* First ref surface top field texcoord element */
915 r->vertex_elems[4].src_offset = 0;
916 r->vertex_elems[4].vertex_buffer_index = 1;
917 r->vertex_elems[4].nr_components = 2;
918 r->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
919
920 /* First ref surface bottom field texcoord element */
921 r->vertex_elems[5].src_offset = sizeof(struct vertex2f);
922 r->vertex_elems[5].vertex_buffer_index = 1;
923 r->vertex_elems[5].nr_components = 2;
924 r->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
925
926 /* Second ref surface top field texcoord element */
927 r->vertex_elems[6].src_offset = 0;
928 r->vertex_elems[6].vertex_buffer_index = 2;
929 r->vertex_elems[6].nr_components = 2;
930 r->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
931
932 /* Second ref surface bottom field texcoord element */
933 r->vertex_elems[7].src_offset = sizeof(struct vertex2f);
934 r->vertex_elems[7].vertex_buffer_index = 2;
935 r->vertex_elems[7].nr_components = 2;
936 r->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
937
938 r->vs_const_buf.buffer = pipe_buffer_create
939 (
940 r->pipe->screen,
941 DEFAULT_BUF_ALIGNMENT,
942 PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
943 sizeof(struct vertex_shader_consts)
944 );
945
946 r->fs_const_buf.buffer = pipe_buffer_create
947 (
948 r->pipe->screen,
949 DEFAULT_BUF_ALIGNMENT,
950 PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
951 );
952
953 memcpy
954 (
955 pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
956 &fs_consts, sizeof(struct fragment_shader_consts)
957 );
958
959 pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
960
961 return true;
962 }
963
964 static void
965 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
966 {
967 unsigned i;
968
969 assert(r);
970
971 pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
972 pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
973
974 for (i = 0; i < 3; ++i)
975 pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
976
977 for (i = 0; i < 3; ++i)
978 pipe_texture_reference(&r->textures.all[i], NULL);
979
980 FREE(r->macroblock_buf);
981 }
982
983 static enum MACROBLOCK_TYPE
984 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
985 {
986 assert(mb);
987
988 switch (mb->mb_type) {
989 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
990 return MACROBLOCK_TYPE_INTRA;
991 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
992 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
993 MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
994 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
995 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
996 MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
997 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
998 return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
999 MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
1000 default:
1001 assert(0);
1002 }
1003
1004 /* Unreachable */
1005 return -1;
1006 }
1007
1008 /* XXX: One of these days this will have to be killed with fire */
1009 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1010 do { \
1011 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1012 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1013 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1014 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1015 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1016 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1017 \
1018 if (!use_zb || (cbp) & (lm)) \
1019 { \
1020 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1021 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1022 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1023 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1024 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1025 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1026 } \
1027 else \
1028 { \
1029 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1030 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1031 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1032 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1033 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1034 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1035 } \
1036 \
1037 if (!use_zb || (cbp) & (cbm)) \
1038 { \
1039 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1040 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1041 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1042 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1043 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1044 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1045 } \
1046 else \
1047 { \
1048 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1049 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1050 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1051 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1052 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1053 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1054 } \
1055 \
1056 if (!use_zb || (cbp) & (crm)) \
1057 { \
1058 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1059 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1060 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1061 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1062 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1063 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1064 } \
1065 else \
1066 { \
1067 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1068 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1069 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1070 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1071 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1072 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1073 } \
1074 } while (0)
1075
1076 static void
1077 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
1078 struct pipe_mpeg12_macroblock *mb, unsigned pos,
1079 struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
1080 {
1081 struct vertex2f mo_vec[2];
1082
1083 unsigned i;
1084
1085 assert(r);
1086 assert(mb);
1087 assert(ycbcr_vb);
1088 assert(pos < r->macroblocks_per_batch);
1089
1090 switch (mb->mb_type) {
1091 case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
1092 {
1093 struct vertex2f *vb;
1094
1095 assert(ref_vb && ref_vb[1]);
1096
1097 vb = ref_vb[1] + pos * 2 * 24;
1098
1099 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1100 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1101
1102 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1103 for (i = 0; i < 24 * 2; i += 2) {
1104 vb[i].x = mo_vec[0].x;
1105 vb[i].y = mo_vec[0].y;
1106 }
1107 }
1108 else {
1109 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1110 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1111
1112 for (i = 0; i < 24 * 2; i += 2) {
1113 vb[i].x = mo_vec[0].x;
1114 vb[i].y = mo_vec[0].y;
1115 vb[i + 1].x = mo_vec[1].x;
1116 vb[i + 1].y = mo_vec[1].y;
1117 }
1118 }
1119
1120 /* fall-through */
1121 }
1122 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
1123 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
1124 {
1125 struct vertex2f *vb;
1126
1127 assert(ref_vb && ref_vb[0]);
1128
1129 vb = ref_vb[0] + pos * 2 * 24;
1130
1131 if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
1132 mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
1133 mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
1134
1135 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1136 mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
1137 mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
1138 }
1139 }
1140 else {
1141 mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
1142 mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
1143
1144 if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
1145 mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
1146 mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
1147 }
1148 }
1149
1150 if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
1151 for (i = 0; i < 24 * 2; i += 2) {
1152 vb[i].x = mo_vec[0].x;
1153 vb[i].y = mo_vec[0].y;
1154 }
1155 }
1156 else {
1157 for (i = 0; i < 24 * 2; i += 2) {
1158 vb[i].x = mo_vec[0].x;
1159 vb[i].y = mo_vec[0].y;
1160 vb[i + 1].x = mo_vec[1].x;
1161 vb[i + 1].y = mo_vec[1].y;
1162 }
1163 }
1164
1165 /* fall-through */
1166 }
1167 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
1168 {
1169 const struct vertex2f unit =
1170 {
1171 r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
1172 r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
1173 };
1174 const struct vertex2f half =
1175 {
1176 r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
1177 r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
1178 };
1179 const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
1180
1181 struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
1182
1183 SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
1184 unit.x, unit.y, 0, 0, half.x, half.y,
1185 32, 2, 1, use_zb, r->zero_block);
1186
1187 SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
1188 unit.x, unit.y, half.x, 0, half.x, half.y,
1189 16, 2, 1, use_zb, r->zero_block);
1190
1191 SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
1192 unit.x, unit.y, 0, half.y, half.x, half.y,
1193 8, 2, 1, use_zb, r->zero_block);
1194
1195 SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
1196 unit.x, unit.y, half.x, half.y, half.x, half.y,
1197 4, 2, 1, use_zb, r->zero_block);
1198
1199 break;
1200 }
1201 default:
1202 assert(0);
1203 }
1204 }
1205
1206 static void
1207 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
1208 unsigned *num_macroblocks)
1209 {
1210 unsigned offset[NUM_MACROBLOCK_TYPES];
1211 struct vert_stream_0 *ycbcr_vb;
1212 struct vertex2f *ref_vb[2];
1213 unsigned i;
1214
1215 assert(r);
1216 assert(num_macroblocks);
1217
1218 for (i = 0; i < r->num_macroblocks; ++i) {
1219 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1220 ++num_macroblocks[mb_type];
1221 }
1222
1223 offset[0] = 0;
1224
1225 for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
1226 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
1227
1228 ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
1229 (
1230 r->pipe->screen,
1231 r->vertex_bufs.individual.ycbcr.buffer,
1232 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1233 );
1234
1235 for (i = 0; i < 2; ++i)
1236 ref_vb[i] = (struct vertex2f *)pipe_buffer_map
1237 (
1238 r->pipe->screen,
1239 r->vertex_bufs.individual.ref[i].buffer,
1240 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1241 );
1242
1243 for (i = 0; i < r->num_macroblocks; ++i) {
1244 enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
1245
1246 gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
1247 ycbcr_vb, ref_vb);
1248
1249 ++offset[mb_type];
1250 }
1251
1252 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ycbcr.buffer);
1253 for (i = 0; i < 2; ++i)
1254 pipe_buffer_unmap(r->pipe->screen, r->vertex_bufs.individual.ref[i].buffer);
1255 }
1256
1257 static void
1258 flush(struct vl_mpeg12_mc_renderer *r)
1259 {
1260 unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
1261 unsigned vb_start = 0;
1262 struct vertex_shader_consts *vs_consts;
1263 unsigned i;
1264
1265 assert(r);
1266 assert(r->num_macroblocks == r->macroblocks_per_batch);
1267
1268 gen_macroblock_stream(r, num_macroblocks);
1269
1270 r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
1271 (
1272 r->pipe->screen, r->surface,
1273 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1274 );
1275
1276 r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
1277 r->pipe->set_viewport_state(r->pipe, &r->viewport);
1278 r->pipe->set_scissor_state(r->pipe, &r->scissor);
1279
1280 vs_consts = pipe_buffer_map
1281 (
1282 r->pipe->screen, r->vs_const_buf.buffer,
1283 PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
1284 );
1285
1286 vs_consts->denorm.x = r->surface->width[0];
1287 vs_consts->denorm.y = r->surface->height[0];
1288
1289 pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
1290
1291 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
1292 &r->vs_const_buf);
1293 r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
1294 &r->fs_const_buf);
1295
1296 if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
1297 r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
1298 r->pipe->set_vertex_elements(r->pipe, 4, r->vertex_elems);
1299 r->pipe->set_sampler_textures(r->pipe, 3, r->textures.all);
1300 r->pipe->bind_sampler_states(r->pipe, 3, r->samplers.all);
1301 r->pipe->bind_vs_state(r->pipe, r->i_vs);
1302 r->pipe->bind_fs_state(r->pipe, r->i_fs);
1303
1304 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1305 num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
1306 vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
1307 }
1308
1309 if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
1310 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1311 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1312 r->textures.individual.ref[0] = r->past;
1313 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1314 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1315 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1316 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1317
1318 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1319 num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
1320 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
1321 }
1322
1323 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1324 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1325 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1326 r->textures.individual.ref[0] = r->past;
1327 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1328 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1329 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1330 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1331
1332 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1333 num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
1334 vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
1335 }
1336
1337 if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
1338 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1339 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1340 r->textures.individual.ref[0] = r->future;
1341 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1342 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1343 r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
1344 r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
1345
1346 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1347 num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
1348 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
1349 }
1350
1351 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1352 r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
1353 r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
1354 r->textures.individual.ref[0] = r->future;
1355 r->pipe->set_sampler_textures(r->pipe, 4, r->textures.all);
1356 r->pipe->bind_sampler_states(r->pipe, 4, r->samplers.all);
1357 r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
1358 r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
1359
1360 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1361 num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
1362 vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
1363 }
1364
1365 if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
1366 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1367 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1368 r->textures.individual.ref[0] = r->past;
1369 r->textures.individual.ref[1] = r->future;
1370 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1371 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1372 r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
1373 r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
1374
1375 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1376 num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
1377 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
1378 }
1379
1380 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1381 r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
1382 r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
1383 r->textures.individual.ref[0] = r->past;
1384 r->textures.individual.ref[1] = r->future;
1385 r->pipe->set_sampler_textures(r->pipe, 5, r->textures.all);
1386 r->pipe->bind_sampler_states(r->pipe, 5, r->samplers.all);
1387 r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
1388 r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
1389
1390 r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
1391 num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
1392 vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
1393 }
1394
1395 r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
1396 pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
1397
1398 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
1399 for (i = 0; i < 3; ++i)
1400 r->zero_block[i].x = ZERO_BLOCK_NIL;
1401
1402 r->num_macroblocks = 0;
1403 }
1404
1405 static void
1406 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
1407 {
1408 unsigned y;
1409
1410 assert(src);
1411 assert(dst);
1412
1413 for (y = 0; y < BLOCK_HEIGHT; ++y)
1414 memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1415 }
1416
1417 static void
1418 grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
1419 {
1420 unsigned y;
1421
1422 assert(src);
1423 assert(dst);
1424
1425 for (y = 0; y < BLOCK_HEIGHT; ++y)
1426 memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
1427 }
1428
1429 static void
1430 fill_zero_block(short *dst, unsigned dst_pitch)
1431 {
1432 unsigned y;
1433
1434 assert(dst);
1435
1436 for (y = 0; y < BLOCK_HEIGHT; ++y)
1437 memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
1438 }
1439
1440 static void
1441 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
1442 enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
1443 {
1444 unsigned tex_pitch;
1445 short *texels;
1446 unsigned tb = 0, sb = 0;
1447 unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
1448 unsigned x, y;
1449
1450 assert(r);
1451 assert(blocks);
1452
1453 tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size;
1454 texels = r->texels[0] + mbpy * tex_pitch + mbpx;
1455
1456 for (y = 0; y < 2; ++y) {
1457 for (x = 0; x < 2; ++x, ++tb) {
1458 if ((cbp >> (5 - tb)) & 1) {
1459 if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
1460 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1461 texels + y * tex_pitch * BLOCK_WIDTH +
1462 x * BLOCK_WIDTH, tex_pitch);
1463 }
1464 else {
1465 grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
1466 texels + y * tex_pitch + x * BLOCK_WIDTH,
1467 tex_pitch);
1468 }
1469
1470 ++sb;
1471 }
1472 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1473 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1474 ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
1475 fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
1476 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1477 r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
1478 r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
1479 }
1480 }
1481 }
1482 }
1483 }
1484
1485 /* TODO: Implement 422, 444 */
1486 assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
1487
1488 mbpx /= 2;
1489 mbpy /= 2;
1490
1491 for (tb = 0; tb < 2; ++tb) {
1492 tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size;
1493 texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
1494
1495 if ((cbp >> (1 - tb)) & 1) {
1496 grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
1497 ++sb;
1498 }
1499 else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
1500 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
1501 ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
1502 fill_zero_block(texels, tex_pitch);
1503 if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
1504 r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
1505 r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
1506 }
1507 }
1508 }
1509 }
1510 }
1511
1512 static void
1513 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
1514 struct pipe_mpeg12_macroblock *mb)
1515 {
1516 assert(r);
1517 assert(mb);
1518 assert(r->num_macroblocks < r->macroblocks_per_batch);
1519
1520 memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
1521 sizeof(struct pipe_mpeg12_macroblock));
1522
1523 grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
1524
1525 ++r->num_macroblocks;
1526 }
1527
1528 bool
1529 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
1530 struct pipe_context *pipe,
1531 unsigned picture_width,
1532 unsigned picture_height,
1533 enum pipe_video_chroma_format chroma_format,
1534 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
1535 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
1536 bool pot_buffers)
1537 {
1538 unsigned i;
1539
1540 assert(renderer);
1541 assert(pipe);
1542 /* TODO: Implement other policies */
1543 assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
1544 /* TODO: Implement this */
1545 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1546 assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
1547 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1548 assert(pot_buffers);
1549
1550 memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
1551
1552 renderer->pipe = pipe;
1553 renderer->picture_width = picture_width;
1554 renderer->picture_height = picture_height;
1555 renderer->chroma_format = chroma_format;
1556 renderer->bufmode = bufmode;
1557 renderer->eb_handling = eb_handling;
1558 renderer->pot_buffers = pot_buffers;
1559
1560 if (!init_pipe_state(renderer))
1561 return false;
1562 if (!init_shaders(renderer)) {
1563 cleanup_pipe_state(renderer);
1564 return false;
1565 }
1566 if (!init_buffers(renderer)) {
1567 cleanup_shaders(renderer);
1568 cleanup_pipe_state(renderer);
1569 return false;
1570 }
1571
1572 renderer->surface = NULL;
1573 renderer->past = NULL;
1574 renderer->future = NULL;
1575 for (i = 0; i < 3; ++i)
1576 renderer->zero_block[i].x = ZERO_BLOCK_NIL;
1577 renderer->num_macroblocks = 0;
1578
1579 xfer_buffers_map(renderer);
1580
1581 return true;
1582 }
1583
1584 void
1585 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
1586 {
1587 assert(renderer);
1588
1589 xfer_buffers_unmap(renderer);
1590
1591 cleanup_pipe_state(renderer);
1592 cleanup_shaders(renderer);
1593 cleanup_buffers(renderer);
1594 }
1595
1596 void
1597 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1598 *renderer,
1599 struct pipe_texture *surface,
1600 struct pipe_texture *past,
1601 struct pipe_texture *future,
1602 unsigned num_macroblocks,
1603 struct pipe_mpeg12_macroblock
1604 *mpeg12_macroblocks,
1605 struct pipe_fence_handle **fence)
1606 {
1607 bool new_surface = false;
1608
1609 assert(renderer);
1610 assert(surface);
1611 assert(num_macroblocks);
1612 assert(mpeg12_macroblocks);
1613
1614 if (renderer->surface) {
1615 if (surface != renderer->surface) {
1616 if (renderer->num_macroblocks > 0) {
1617 xfer_buffers_unmap(renderer);
1618 flush(renderer);
1619 }
1620
1621 new_surface = true;
1622 }
1623
1624 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1625 assert(surface != renderer->surface || renderer->past == past);
1626 assert(surface != renderer->surface || renderer->future == future);
1627 }
1628 else
1629 new_surface = true;
1630
1631 if (new_surface) {
1632 renderer->surface = surface;
1633 renderer->past = past;
1634 renderer->future = future;
1635 renderer->fence = fence;
1636 renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
1637 renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
1638 }
1639
1640 while (num_macroblocks) {
1641 unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
1642 unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
1643 unsigned i;
1644
1645 for (i = 0; i < num_to_submit; ++i) {
1646 assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
1647 grab_macroblock(renderer, &mpeg12_macroblocks[i]);
1648 }
1649
1650 num_macroblocks -= num_to_submit;
1651
1652 if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
1653 xfer_buffers_unmap(renderer);
1654 flush(renderer);
1655 xfer_buffers_map(renderer);
1656 /* Next time we get this surface it may have new ref frames */
1657 renderer->surface = NULL;
1658 }
1659 }
1660 }