1 /**************************************************************************
3 * Copyright 2009 Younes Manton.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "vl_mpeg12_mc_renderer.h"
30 #include <pipe/p_context.h>
31 #include <pipe/p_inlines.h>
32 #include <util/u_math.h>
33 #include <util/u_memory.h>
34 #include <tgsi/tgsi_parse.h>
35 #include <tgsi/tgsi_build.h>
36 #include "vl_shader_build.h"
38 #define DEFAULT_BUF_ALIGNMENT 1
39 #define MACROBLOCK_WIDTH 16
40 #define MACROBLOCK_HEIGHT 16
42 #define BLOCK_HEIGHT 8
43 #define ZERO_BLOCK_NIL -1.0f
44 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
56 struct vertex_shader_consts
58 struct vertex4f denorm
;
61 struct fragment_shader_consts
63 struct vertex4f multiplier
;
68 * Muliplier renormalizes block samples from 16 bits to 12 bits.
69 * Divider is used when calculating Y % 2 for choosing top or bottom
70 * field for P or B macroblocks.
71 * TODO: Use immediates.
73 static const struct fragment_shader_consts fs_consts
= {
74 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
75 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
81 struct vertex2f luma_tc
;
82 struct vertex2f cb_tc
;
83 struct vertex2f cr_tc
;
88 MACROBLOCK_TYPE_INTRA
,
89 MACROBLOCK_TYPE_FWD_FRAME_PRED
,
90 MACROBLOCK_TYPE_FWD_FIELD_PRED
,
91 MACROBLOCK_TYPE_BKWD_FRAME_PRED
,
92 MACROBLOCK_TYPE_BKWD_FIELD_PRED
,
93 MACROBLOCK_TYPE_BI_FRAME_PRED
,
94 MACROBLOCK_TYPE_BI_FIELD_PRED
,
100 create_intra_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
102 const unsigned max_tokens
= 50;
104 struct pipe_shader_state vs
;
105 struct tgsi_token
*tokens
;
106 struct tgsi_header
*header
;
108 struct tgsi_full_declaration decl
;
109 struct tgsi_full_instruction inst
;
117 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
118 header
= (struct tgsi_header
*) &tokens
[0];
119 *header
= tgsi_build_header();
120 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
125 * decl i0 ; Vertex pos
126 * decl i1 ; Luma texcoords
127 * decl i2 ; Chroma Cb texcoords
128 * decl i3 ; Chroma Cr texcoords
130 for (i
= 0; i
< 4; i
++) {
131 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
132 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
136 * decl o0 ; Vertex pos
137 * decl o1 ; Luma texcoords
138 * decl o2 ; Chroma Cb texcoords
139 * decl o3 ; Chroma Cr texcoords
141 for (i
= 0; i
< 4; i
++) {
142 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
143 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
147 * mov o0, i0 ; Move input vertex pos to output
148 * mov o1, i1 ; Move input luma texcoords to output
149 * mov o2, i2 ; Move input chroma Cb texcoords to output
150 * mov o3, i3 ; Move input chroma Cr texcoords to output
152 for (i
= 0; i
< 4; ++i
) {
153 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
154 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
159 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
161 assert(ti
<= max_tokens
);
164 r
->i_vs
= r
->pipe
->create_vs_state(r
->pipe
, &vs
);
169 create_intra_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
171 const unsigned max_tokens
= 100;
173 struct pipe_shader_state fs
;
174 struct tgsi_token
*tokens
;
175 struct tgsi_header
*header
;
177 struct tgsi_full_declaration decl
;
178 struct tgsi_full_instruction inst
;
186 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
187 header
= (struct tgsi_header
*) &tokens
[0];
188 *header
= tgsi_build_header();
189 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
194 * decl i0 ; Luma texcoords
195 * decl i1 ; Chroma Cb texcoords
196 * decl i2 ; Chroma Cr texcoords
198 for (i
= 0; i
< 3; ++i
) {
199 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
200 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
203 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
204 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
205 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
207 /* decl o0 ; Fragment color */
208 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
209 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
212 decl
= vl_decl_temps(0, 1);
213 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
216 * decl s0 ; Sampler for luma texture
217 * decl s1 ; Sampler for chroma Cb texture
218 * decl s2 ; Sampler for chroma Cr texture
220 for (i
= 0; i
< 3; ++i
) {
221 decl
= vl_decl_samplers(i
, i
);
222 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
226 * tex2d t1, i0, s0 ; Read texel from luma texture
227 * mov t0.x, t1.x ; Move luma sample into .x component
228 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
229 * mov t0.y, t1.x ; Move Cb sample into .y component
230 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
231 * mov t0.z, t1.x ; Move Cr sample into .z component
233 for (i
= 0; i
< 3; ++i
) {
234 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
235 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
237 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
238 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
239 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
240 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
241 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
242 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
245 /* mul o0, t0, c0 ; Rescale texel to correct range */
246 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
247 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
251 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
253 assert(ti
<= max_tokens
);
256 r
->i_fs
= r
->pipe
->create_fs_state(r
->pipe
, &fs
);
261 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
263 const unsigned max_tokens
= 100;
265 struct pipe_shader_state vs
;
266 struct tgsi_token
*tokens
;
267 struct tgsi_header
*header
;
269 struct tgsi_full_declaration decl
;
270 struct tgsi_full_instruction inst
;
278 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
279 header
= (struct tgsi_header
*) &tokens
[0];
280 *header
= tgsi_build_header();
281 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
286 * decl i0 ; Vertex pos
287 * decl i1 ; Luma texcoords
288 * decl i2 ; Chroma Cb texcoords
289 * decl i3 ; Chroma Cr texcoords
290 * decl i4 ; Ref surface top field texcoords
291 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
293 for (i
= 0; i
< 6; i
++) {
294 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
295 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
299 * decl o0 ; Vertex pos
300 * decl o1 ; Luma texcoords
301 * decl o2 ; Chroma Cb texcoords
302 * decl o3 ; Chroma Cr texcoords
303 * decl o4 ; Ref macroblock texcoords
305 for (i
= 0; i
< 5; i
++) {
306 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
307 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
311 * mov o0, i0 ; Move input vertex pos to output
312 * mov o1, i1 ; Move input luma texcoords to output
313 * mov o2, i2 ; Move input chroma Cb texcoords to output
314 * mov o3, i3 ; Move input chroma Cr texcoords to output
316 for (i
= 0; i
< 4; ++i
) {
317 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
318 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
321 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
322 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
323 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
327 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
329 assert(ti
<= max_tokens
);
332 r
->p_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
337 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
343 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
345 const unsigned max_tokens
= 100;
347 struct pipe_shader_state fs
;
348 struct tgsi_token
*tokens
;
349 struct tgsi_header
*header
;
351 struct tgsi_full_declaration decl
;
352 struct tgsi_full_instruction inst
;
360 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
361 header
= (struct tgsi_header
*) &tokens
[0];
362 *header
= tgsi_build_header();
363 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
368 * decl i0 ; Luma texcoords
369 * decl i1 ; Chroma Cb texcoords
370 * decl i2 ; Chroma Cr texcoords
371 * decl i3 ; Ref macroblock texcoords
373 for (i
= 0; i
< 4; ++i
) {
374 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
375 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
378 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
379 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
380 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
382 /* decl o0 ; Fragment color */
383 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
384 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
387 decl
= vl_decl_temps(0, 1);
388 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
391 * decl s0 ; Sampler for luma texture
392 * decl s1 ; Sampler for chroma Cb texture
393 * decl s2 ; Sampler for chroma Cr texture
394 * decl s3 ; Sampler for ref surface texture
396 for (i
= 0; i
< 4; ++i
) {
397 decl
= vl_decl_samplers(i
, i
);
398 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
402 * tex2d t1, i0, s0 ; Read texel from luma texture
403 * mov t0.x, t1.x ; Move luma sample into .x component
404 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
405 * mov t0.y, t1.x ; Move Cb sample into .y component
406 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
407 * mov t0.z, t1.x ; Move Cr sample into .z component
409 for (i
= 0; i
< 3; ++i
) {
410 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
411 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
413 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
414 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
415 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
416 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
417 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
418 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
421 /* mul t0, t0, c0 ; Rescale texel to correct range */
422 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
423 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
425 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
426 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
427 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
429 /* add o0, t0, t1 ; Add ref and differential to form final output */
430 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
431 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
435 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
437 assert(ti
<= max_tokens
);
440 r
->p_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
445 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
451 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
453 const unsigned max_tokens
= 100;
455 struct pipe_shader_state vs
;
456 struct tgsi_token
*tokens
;
457 struct tgsi_header
*header
;
459 struct tgsi_full_declaration decl
;
460 struct tgsi_full_instruction inst
;
468 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
469 header
= (struct tgsi_header
*) &tokens
[0];
470 *header
= tgsi_build_header();
471 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
476 * decl i0 ; Vertex pos
477 * decl i1 ; Luma texcoords
478 * decl i2 ; Chroma Cb texcoords
479 * decl i3 ; Chroma Cr texcoords
480 * decl i4 ; First ref macroblock top field texcoords
481 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
482 * decl i6 ; Second ref macroblock top field texcoords
483 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
485 for (i
= 0; i
< 8; i
++) {
486 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
487 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
491 * decl o0 ; Vertex pos
492 * decl o1 ; Luma texcoords
493 * decl o2 ; Chroma Cb texcoords
494 * decl o3 ; Chroma Cr texcoords
495 * decl o4 ; First ref macroblock texcoords
496 * decl o5 ; Second ref macroblock texcoords
498 for (i
= 0; i
< 6; i
++) {
499 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
500 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
504 * mov o0, i0 ; Move input vertex pos to output
505 * mov o1, i1 ; Move input luma texcoords to output
506 * mov o2, i2 ; Move input chroma Cb texcoords to output
507 * mov o3, i3 ; Move input chroma Cr texcoords to output
509 for (i
= 0; i
< 4; ++i
) {
510 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
511 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
515 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
516 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
518 for (i
= 0; i
< 2; ++i
) {
519 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
520 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
525 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
527 assert(ti
<= max_tokens
);
530 r
->b_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
535 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
541 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
543 const unsigned max_tokens
= 100;
545 struct pipe_shader_state fs
;
546 struct tgsi_token
*tokens
;
547 struct tgsi_header
*header
;
549 struct tgsi_full_declaration decl
;
550 struct tgsi_full_instruction inst
;
558 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
559 header
= (struct tgsi_header
*) &tokens
[0];
560 *header
= tgsi_build_header();
561 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
566 * decl i0 ; Luma texcoords
567 * decl i1 ; Chroma Cb texcoords
568 * decl i2 ; Chroma Cr texcoords
569 * decl i3 ; First ref macroblock texcoords
570 * decl i4 ; Second ref macroblock texcoords
572 for (i
= 0; i
< 5; ++i
) {
573 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
574 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
578 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
579 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
581 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
582 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
584 /* decl o0 ; Fragment color */
585 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
586 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
589 decl
= vl_decl_temps(0, 2);
590 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
593 * decl s0 ; Sampler for luma texture
594 * decl s1 ; Sampler for chroma Cb texture
595 * decl s2 ; Sampler for chroma Cr texture
596 * decl s3 ; Sampler for first ref surface texture
597 * decl s4 ; Sampler for second ref surface texture
599 for (i
= 0; i
< 5; ++i
) {
600 decl
= vl_decl_samplers(i
, i
);
601 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
605 * tex2d t1, i0, s0 ; Read texel from luma texture
606 * mov t0.x, t1.x ; Move luma sample into .x component
607 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
608 * mov t0.y, t1.x ; Move Cb sample into .y component
609 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
610 * mov t0.z, t1.x ; Move Cr sample into .z component
612 for (i
= 0; i
< 3; ++i
) {
613 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
614 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
616 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
617 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
618 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
619 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
620 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
621 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
624 /* mul t0, t0, c0 ; Rescale texel to correct range */
625 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
626 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
629 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
630 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
632 for (i
= 0; i
< 2; ++i
) {
633 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
634 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
637 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
638 inst
= vl_inst4(TGSI_OPCODE_LRP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
639 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
640 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
641 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
642 inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
643 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
645 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
646 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
647 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
651 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
653 assert(ti
<= max_tokens
);
656 r
->b_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
661 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
667 xfer_buffers_map(struct vl_mpeg12_mc_renderer
*r
)
673 for (i
= 0; i
< 3; ++i
) {
674 r
->tex_transfer
[i
] = r
->pipe
->screen
->get_tex_transfer
676 r
->pipe
->screen
, r
->textures
.all
[i
],
677 0, 0, 0, PIPE_TRANSFER_WRITE
, 0, 0,
678 r
->textures
.all
[i
]->width0
, r
->textures
.all
[i
]->height0
681 r
->texels
[i
] = r
->pipe
->screen
->transfer_map(r
->pipe
->screen
, r
->tex_transfer
[i
]);
686 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer
*r
)
692 for (i
= 0; i
< 3; ++i
) {
693 r
->pipe
->screen
->transfer_unmap(r
->pipe
->screen
, r
->tex_transfer
[i
]);
694 r
->pipe
->screen
->tex_transfer_destroy(r
->tex_transfer
[i
]);
699 init_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
701 struct pipe_sampler_state sampler
;
707 r
->viewport
.scale
[0] = r
->pot_buffers
?
708 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
709 r
->viewport
.scale
[1] = r
->pot_buffers
?
710 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
711 r
->viewport
.scale
[2] = 1;
712 r
->viewport
.scale
[3] = 1;
713 r
->viewport
.translate
[0] = 0;
714 r
->viewport
.translate
[1] = 0;
715 r
->viewport
.translate
[2] = 0;
716 r
->viewport
.translate
[3] = 0;
718 r
->scissor
.maxx
= r
->pot_buffers
?
719 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
720 r
->scissor
.maxy
= r
->pot_buffers
?
721 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
723 r
->fb_state
.width
= r
->pot_buffers
?
724 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
725 r
->fb_state
.height
= r
->pot_buffers
?
726 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
727 r
->fb_state
.nr_cbufs
= 1;
728 r
->fb_state
.zsbuf
= NULL
;
731 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
733 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
||
734 r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
735 filters
[1] = PIPE_TEX_FILTER_NEAREST
;
736 filters
[2] = PIPE_TEX_FILTER_NEAREST
;
739 filters
[1] = PIPE_TEX_FILTER_LINEAR
;
740 filters
[2] = PIPE_TEX_FILTER_LINEAR
;
742 /* Fwd, bkwd ref filters */
743 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
744 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
746 for (i
= 0; i
< 5; ++i
) {
747 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
748 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
749 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
750 sampler
.min_img_filter
= filters
[i
];
751 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
752 sampler
.mag_img_filter
= filters
[i
];
753 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
754 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
755 sampler
.normalized_coords
= 1;
756 /*sampler.prefilter = ; */
757 /*sampler.shadow_ambient = ; */
758 /*sampler.lod_bias = ; */
760 /*sampler.max_lod = ; */
761 /*sampler.border_color[i] = ; */
762 /*sampler.max_anisotropy = ; */
763 r
->samplers
.all
[i
] = r
->pipe
->create_sampler_state(r
->pipe
, &sampler
);
770 cleanup_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
776 for (i
= 0; i
< 5; ++i
)
777 r
->pipe
->delete_sampler_state(r
->pipe
, r
->samplers
.all
[i
]);
781 init_shaders(struct vl_mpeg12_mc_renderer
*r
)
785 create_intra_vert_shader(r
);
786 create_intra_frag_shader(r
);
787 create_frame_pred_vert_shader(r
);
788 create_frame_pred_frag_shader(r
);
789 create_frame_bi_pred_vert_shader(r
);
790 create_frame_bi_pred_frag_shader(r
);
796 cleanup_shaders(struct vl_mpeg12_mc_renderer
*r
)
800 r
->pipe
->delete_vs_state(r
->pipe
, r
->i_vs
);
801 r
->pipe
->delete_fs_state(r
->pipe
, r
->i_fs
);
802 r
->pipe
->delete_vs_state(r
->pipe
, r
->p_vs
[0]);
803 r
->pipe
->delete_fs_state(r
->pipe
, r
->p_fs
[0]);
804 r
->pipe
->delete_vs_state(r
->pipe
, r
->b_vs
[0]);
805 r
->pipe
->delete_fs_state(r
->pipe
, r
->b_fs
[0]);
809 init_buffers(struct vl_mpeg12_mc_renderer
*r
)
811 struct pipe_texture
template;
814 align(r
->picture_width
, MACROBLOCK_WIDTH
) / MACROBLOCK_WIDTH
;
816 align(r
->picture_height
, MACROBLOCK_HEIGHT
) / MACROBLOCK_HEIGHT
;
822 r
->macroblocks_per_batch
=
823 mbw
* (r
->bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
? mbh
: 1);
824 r
->num_macroblocks
= 0;
825 r
->macroblock_buf
= MALLOC(r
->macroblocks_per_batch
* sizeof(struct pipe_mpeg12_macroblock
));
827 memset(&template, 0, sizeof(struct pipe_texture
));
828 template.target
= PIPE_TEXTURE_2D
;
829 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
830 template.format
= PIPE_FORMAT_R16_SNORM
;
831 template.last_level
= 0;
832 template.width0
= r
->pot_buffers
?
833 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
834 template.height0
= r
->pot_buffers
?
835 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
837 template.tex_usage
= PIPE_TEXTURE_USAGE_SAMPLER
| PIPE_TEXTURE_USAGE_DYNAMIC
;
839 r
->textures
.individual
.y
= r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
841 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
) {
842 template.width0
= r
->pot_buffers
?
843 util_next_power_of_two(r
->picture_width
/ 2) :
844 r
->picture_width
/ 2;
845 template.height0
= r
->pot_buffers
?
846 util_next_power_of_two(r
->picture_height
/ 2) :
847 r
->picture_height
/ 2;
849 else if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
)
850 template.height0
= r
->pot_buffers
?
851 util_next_power_of_two(r
->picture_height
/ 2) :
852 r
->picture_height
/ 2;
854 r
->textures
.individual
.cb
=
855 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
856 r
->textures
.individual
.cr
=
857 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
859 r
->vertex_bufs
.individual
.ycbcr
.stride
= sizeof(struct vertex2f
) * 4;
860 r
->vertex_bufs
.individual
.ycbcr
.max_index
= 24 * r
->macroblocks_per_batch
- 1;
861 r
->vertex_bufs
.individual
.ycbcr
.buffer_offset
= 0;
862 r
->vertex_bufs
.individual
.ycbcr
.buffer
= pipe_buffer_create
865 DEFAULT_BUF_ALIGNMENT
,
866 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
867 sizeof(struct vertex2f
) * 4 * 24 * r
->macroblocks_per_batch
870 for (i
= 1; i
< 3; ++i
) {
871 r
->vertex_bufs
.all
[i
].stride
= sizeof(struct vertex2f
) * 2;
872 r
->vertex_bufs
.all
[i
].max_index
= 24 * r
->macroblocks_per_batch
- 1;
873 r
->vertex_bufs
.all
[i
].buffer_offset
= 0;
874 r
->vertex_bufs
.all
[i
].buffer
= pipe_buffer_create
877 DEFAULT_BUF_ALIGNMENT
,
878 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
879 sizeof(struct vertex2f
) * 2 * 24 * r
->macroblocks_per_batch
883 /* Position element */
884 r
->vertex_elems
[0].src_offset
= 0;
885 r
->vertex_elems
[0].vertex_buffer_index
= 0;
886 r
->vertex_elems
[0].nr_components
= 2;
887 r
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
889 /* Luma, texcoord element */
890 r
->vertex_elems
[1].src_offset
= sizeof(struct vertex2f
);
891 r
->vertex_elems
[1].vertex_buffer_index
= 0;
892 r
->vertex_elems
[1].nr_components
= 2;
893 r
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
895 /* Chroma Cr texcoord element */
896 r
->vertex_elems
[2].src_offset
= sizeof(struct vertex2f
) * 2;
897 r
->vertex_elems
[2].vertex_buffer_index
= 0;
898 r
->vertex_elems
[2].nr_components
= 2;
899 r
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
901 /* Chroma Cb texcoord element */
902 r
->vertex_elems
[3].src_offset
= sizeof(struct vertex2f
) * 3;
903 r
->vertex_elems
[3].vertex_buffer_index
= 0;
904 r
->vertex_elems
[3].nr_components
= 2;
905 r
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
907 /* First ref surface top field texcoord element */
908 r
->vertex_elems
[4].src_offset
= 0;
909 r
->vertex_elems
[4].vertex_buffer_index
= 1;
910 r
->vertex_elems
[4].nr_components
= 2;
911 r
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
913 /* First ref surface bottom field texcoord element */
914 r
->vertex_elems
[5].src_offset
= sizeof(struct vertex2f
);
915 r
->vertex_elems
[5].vertex_buffer_index
= 1;
916 r
->vertex_elems
[5].nr_components
= 2;
917 r
->vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
919 /* Second ref surface top field texcoord element */
920 r
->vertex_elems
[6].src_offset
= 0;
921 r
->vertex_elems
[6].vertex_buffer_index
= 2;
922 r
->vertex_elems
[6].nr_components
= 2;
923 r
->vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
925 /* Second ref surface bottom field texcoord element */
926 r
->vertex_elems
[7].src_offset
= sizeof(struct vertex2f
);
927 r
->vertex_elems
[7].vertex_buffer_index
= 2;
928 r
->vertex_elems
[7].nr_components
= 2;
929 r
->vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
931 r
->vs_const_buf
.buffer
= pipe_buffer_create
934 DEFAULT_BUF_ALIGNMENT
,
935 PIPE_BUFFER_USAGE_CONSTANT
| PIPE_BUFFER_USAGE_DISCARD
,
936 sizeof(struct vertex_shader_consts
)
939 r
->fs_const_buf
.buffer
= pipe_buffer_create
942 DEFAULT_BUF_ALIGNMENT
,
943 PIPE_BUFFER_USAGE_CONSTANT
, sizeof(struct fragment_shader_consts
)
948 pipe_buffer_map(r
->pipe
->screen
, r
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
949 &fs_consts
, sizeof(struct fragment_shader_consts
)
952 pipe_buffer_unmap(r
->pipe
->screen
, r
->fs_const_buf
.buffer
);
958 cleanup_buffers(struct vl_mpeg12_mc_renderer
*r
)
964 pipe_buffer_reference(&r
->vs_const_buf
.buffer
, NULL
);
965 pipe_buffer_reference(&r
->fs_const_buf
.buffer
, NULL
);
967 for (i
= 0; i
< 3; ++i
)
968 pipe_buffer_reference(&r
->vertex_bufs
.all
[i
].buffer
, NULL
);
970 for (i
= 0; i
< 3; ++i
)
971 pipe_texture_reference(&r
->textures
.all
[i
], NULL
);
973 FREE(r
->macroblock_buf
);
976 static enum MACROBLOCK_TYPE
977 get_macroblock_type(struct pipe_mpeg12_macroblock
*mb
)
981 switch (mb
->mb_type
) {
982 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
983 return MACROBLOCK_TYPE_INTRA
;
984 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
985 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
986 MACROBLOCK_TYPE_FWD_FRAME_PRED
: MACROBLOCK_TYPE_FWD_FIELD_PRED
;
987 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
988 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
989 MACROBLOCK_TYPE_BKWD_FRAME_PRED
: MACROBLOCK_TYPE_BKWD_FIELD_PRED
;
990 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
991 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
992 MACROBLOCK_TYPE_BI_FRAME_PRED
: MACROBLOCK_TYPE_BI_FIELD_PRED
;
1001 /* XXX: One of these days this will have to be killed with fire */
1002 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1004 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1005 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1006 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1007 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1008 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1009 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1011 if (!use_zb || (cbp) & (lm)) \
1013 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1014 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1015 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1016 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1017 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1018 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1022 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1023 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1024 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1025 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1026 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1027 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1030 if (!use_zb || (cbp) & (cbm)) \
1032 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1033 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1034 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1035 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1036 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1037 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1041 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1042 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1043 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1044 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1045 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1046 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1049 if (!use_zb || (cbp) & (crm)) \
1051 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1052 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1053 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1054 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1055 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1056 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1060 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1061 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1062 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1063 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1064 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1065 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1070 gen_macroblock_verts(struct vl_mpeg12_mc_renderer
*r
,
1071 struct pipe_mpeg12_macroblock
*mb
, unsigned pos
,
1072 struct vert_stream_0
*ycbcr_vb
, struct vertex2f
**ref_vb
)
1074 struct vertex2f mo_vec
[2];
1081 assert(pos
< r
->macroblocks_per_batch
);
1083 switch (mb
->mb_type
) {
1084 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
1086 struct vertex2f
*vb
;
1088 assert(ref_vb
&& ref_vb
[1]);
1090 vb
= ref_vb
[1] + pos
* 2 * 24;
1092 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1093 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1095 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
1096 for (i
= 0; i
< 24 * 2; i
+= 2) {
1097 vb
[i
].x
= mo_vec
[0].x
;
1098 vb
[i
].y
= mo_vec
[0].y
;
1102 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1103 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1105 for (i
= 0; i
< 24 * 2; i
+= 2) {
1106 vb
[i
].x
= mo_vec
[0].x
;
1107 vb
[i
].y
= mo_vec
[0].y
;
1108 vb
[i
+ 1].x
= mo_vec
[1].x
;
1109 vb
[i
+ 1].y
= mo_vec
[1].y
;
1115 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
1116 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
1118 struct vertex2f
*vb
;
1120 assert(ref_vb
&& ref_vb
[0]);
1122 vb
= ref_vb
[0] + pos
* 2 * 24;
1124 if (mb
->mb_type
== PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
) {
1125 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1126 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1128 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
) {
1129 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1130 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1134 mo_vec
[0].x
= mb
->pmv
[0][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1135 mo_vec
[0].y
= mb
->pmv
[0][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1137 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
) {
1138 mo_vec
[1].x
= mb
->pmv
[1][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1139 mo_vec
[1].y
= mb
->pmv
[1][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1143 if (mb
->mb_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
1144 for (i
= 0; i
< 24 * 2; i
+= 2) {
1145 vb
[i
].x
= mo_vec
[0].x
;
1146 vb
[i
].y
= mo_vec
[0].y
;
1150 for (i
= 0; i
< 24 * 2; i
+= 2) {
1151 vb
[i
].x
= mo_vec
[0].x
;
1152 vb
[i
].y
= mo_vec
[0].y
;
1153 vb
[i
+ 1].x
= mo_vec
[1].x
;
1154 vb
[i
+ 1].y
= mo_vec
[1].y
;
1160 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
1162 const struct vertex2f unit
=
1164 r
->surface_tex_inv_size
.x
* MACROBLOCK_WIDTH
,
1165 r
->surface_tex_inv_size
.y
* MACROBLOCK_HEIGHT
1167 const struct vertex2f half
=
1169 r
->surface_tex_inv_size
.x
* (MACROBLOCK_WIDTH
/ 2),
1170 r
->surface_tex_inv_size
.y
* (MACROBLOCK_HEIGHT
/ 2)
1172 const bool use_zb
= r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
;
1174 struct vert_stream_0
*vb
= ycbcr_vb
+ pos
* 24;
1176 SET_BLOCK(vb
, mb
->cbp
, mb
->mbx
, mb
->mby
,
1177 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
1178 32, 2, 1, use_zb
, r
->zero_block
);
1180 SET_BLOCK(vb
+ 6, mb
->cbp
, mb
->mbx
, mb
->mby
,
1181 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
1182 16, 2, 1, use_zb
, r
->zero_block
);
1184 SET_BLOCK(vb
+ 12, mb
->cbp
, mb
->mbx
, mb
->mby
,
1185 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
1186 8, 2, 1, use_zb
, r
->zero_block
);
1188 SET_BLOCK(vb
+ 18, mb
->cbp
, mb
->mbx
, mb
->mby
,
1189 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
1190 4, 2, 1, use_zb
, r
->zero_block
);
1200 gen_macroblock_stream(struct vl_mpeg12_mc_renderer
*r
,
1201 unsigned *num_macroblocks
)
1203 unsigned offset
[NUM_MACROBLOCK_TYPES
];
1204 struct vert_stream_0
*ycbcr_vb
;
1205 struct vertex2f
*ref_vb
[2];
1209 assert(num_macroblocks
);
1211 for (i
= 0; i
< r
->num_macroblocks
; ++i
) {
1212 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1213 ++num_macroblocks
[mb_type
];
1218 for (i
= 1; i
< NUM_MACROBLOCK_TYPES
; ++i
)
1219 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
1221 ycbcr_vb
= (struct vert_stream_0
*)pipe_buffer_map
1224 r
->vertex_bufs
.individual
.ycbcr
.buffer
,
1225 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1228 for (i
= 0; i
< 2; ++i
)
1229 ref_vb
[i
] = (struct vertex2f
*)pipe_buffer_map
1232 r
->vertex_bufs
.individual
.ref
[i
].buffer
,
1233 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1236 for (i
= 0; i
< r
->num_macroblocks
; ++i
) {
1237 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1239 gen_macroblock_verts(r
, &r
->macroblock_buf
[i
], offset
[mb_type
],
1245 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ycbcr
.buffer
);
1246 for (i
= 0; i
< 2; ++i
)
1247 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ref
[i
].buffer
);
1251 flush(struct vl_mpeg12_mc_renderer
*r
)
1253 unsigned num_macroblocks
[NUM_MACROBLOCK_TYPES
] = { 0 };
1254 unsigned vb_start
= 0;
1255 struct vertex_shader_consts
*vs_consts
;
1259 assert(r
->num_macroblocks
== r
->macroblocks_per_batch
);
1261 gen_macroblock_stream(r
, num_macroblocks
);
1263 r
->fb_state
.cbufs
[0] = r
->pipe
->screen
->get_tex_surface
1265 r
->pipe
->screen
, r
->surface
,
1266 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1269 r
->pipe
->set_framebuffer_state(r
->pipe
, &r
->fb_state
);
1270 r
->pipe
->set_viewport_state(r
->pipe
, &r
->viewport
);
1271 r
->pipe
->set_scissor_state(r
->pipe
, &r
->scissor
);
1273 vs_consts
= pipe_buffer_map
1275 r
->pipe
->screen
, r
->vs_const_buf
.buffer
,
1276 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1279 vs_consts
->denorm
.x
= r
->surface
->width0
;
1280 vs_consts
->denorm
.y
= r
->surface
->height0
;
1282 pipe_buffer_unmap(r
->pipe
->screen
, r
->vs_const_buf
.buffer
);
1284 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_VERTEX
, 0,
1286 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_FRAGMENT
, 0,
1289 if (num_macroblocks
[MACROBLOCK_TYPE_INTRA
] > 0) {
1290 r
->pipe
->set_vertex_buffers(r
->pipe
, 1, r
->vertex_bufs
.all
);
1291 r
->pipe
->set_vertex_elements(r
->pipe
, 4, r
->vertex_elems
);
1292 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 3, r
->textures
.all
);
1293 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 3, r
->samplers
.all
);
1294 r
->pipe
->bind_vs_state(r
->pipe
, r
->i_vs
);
1295 r
->pipe
->bind_fs_state(r
->pipe
, r
->i_fs
);
1297 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1298 num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24);
1299 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24;
1302 if (num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] > 0) {
1303 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1304 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1305 r
->textures
.individual
.ref
[0] = r
->past
;
1306 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1307 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1308 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1309 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1311 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1312 num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24);
1313 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24;
1316 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1317 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1318 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1319 r
->textures
.individual
.ref
[0] = r
->past
;
1320 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1321 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1322 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1323 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1325 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1326 num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24);
1327 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24;
1330 if (num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] > 0) {
1331 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1332 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1333 r
->textures
.individual
.ref
[0] = r
->future
;
1334 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1335 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1336 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1337 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1339 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1340 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24);
1341 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24;
1344 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1345 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1346 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1347 r
->textures
.individual
.ref
[0] = r
->future
;
1348 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1349 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1350 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1351 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1353 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1354 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24);
1355 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24;
1358 if (num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] > 0) {
1359 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1360 r
->pipe
->set_vertex_elements(r
->pipe
, 8, r
->vertex_elems
);
1361 r
->textures
.individual
.ref
[0] = r
->past
;
1362 r
->textures
.individual
.ref
[1] = r
->future
;
1363 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1364 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1365 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[0]);
1366 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[0]);
1368 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1369 num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24);
1370 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24;
1373 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1374 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1375 r
->pipe
->set_vertex_elements(r
->pipe
, 8, r
->vertex_elems
);
1376 r
->textures
.individual
.ref
[0] = r
->past
;
1377 r
->textures
.individual
.ref
[1] = r
->future
;
1378 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1379 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1380 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[1]);
1381 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[1]);
1383 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1384 num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24);
1385 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24;
1388 r
->pipe
->flush(r
->pipe
, PIPE_FLUSH_RENDER_CACHE
, r
->fence
);
1389 pipe_surface_reference(&r
->fb_state
.cbufs
[0], NULL
);
1391 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
1392 for (i
= 0; i
< 3; ++i
)
1393 r
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1395 r
->num_macroblocks
= 0;
1399 grab_frame_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1406 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1407 memcpy(dst
+ y
* dst_pitch
, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1411 grab_field_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1418 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1419 memcpy(dst
+ y
* dst_pitch
* 2, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1423 fill_zero_block(short *dst
, unsigned dst_pitch
)
1429 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1430 memset(dst
+ y
* dst_pitch
, 0, BLOCK_WIDTH
* 2);
1434 grab_blocks(struct vl_mpeg12_mc_renderer
*r
, unsigned mbx
, unsigned mby
,
1435 enum pipe_mpeg12_dct_type dct_type
, unsigned cbp
, short *blocks
)
1439 unsigned tb
= 0, sb
= 0;
1440 unsigned mbpx
= mbx
* MACROBLOCK_WIDTH
, mbpy
= mby
* MACROBLOCK_HEIGHT
;
1446 tex_pitch
= r
->tex_transfer
[0]->stride
/ pf_get_blocksize(r
->tex_transfer
[0]->texture
->format
);
1447 texels
= r
->texels
[0] + mbpy
* tex_pitch
+ mbpx
;
1449 for (y
= 0; y
< 2; ++y
) {
1450 for (x
= 0; x
< 2; ++x
, ++tb
) {
1451 if ((cbp
>> (5 - tb
)) & 1) {
1452 if (dct_type
== PIPE_MPEG12_DCT_TYPE_FRAME
) {
1453 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1454 texels
+ y
* tex_pitch
* BLOCK_WIDTH
+
1455 x
* BLOCK_WIDTH
, tex_pitch
);
1458 grab_field_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1459 texels
+ y
* tex_pitch
+ x
* BLOCK_WIDTH
,
1465 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
) {
1466 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1467 ZERO_BLOCK_IS_NIL(r
->zero_block
[0])) {
1468 fill_zero_block(texels
+ y
* tex_pitch
* BLOCK_WIDTH
+ x
* BLOCK_WIDTH
, tex_pitch
);
1469 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
1470 r
->zero_block
[0].x
= (mbpx
+ x
* 8) * r
->surface_tex_inv_size
.x
;
1471 r
->zero_block
[0].y
= (mbpy
+ y
* 8) * r
->surface_tex_inv_size
.y
;
1478 /* TODO: Implement 422, 444 */
1479 assert(r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
1484 for (tb
= 0; tb
< 2; ++tb
) {
1485 tex_pitch
= r
->tex_transfer
[tb
+ 1]->stride
/ pf_get_blocksize(r
->tex_transfer
[tb
+ 1]->texture
->format
);
1486 texels
= r
->texels
[tb
+ 1] + mbpy
* tex_pitch
+ mbpx
;
1488 if ((cbp
>> (1 - tb
)) & 1) {
1489 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
, texels
, tex_pitch
);
1492 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
) {
1493 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1494 ZERO_BLOCK_IS_NIL(r
->zero_block
[tb
+ 1])) {
1495 fill_zero_block(texels
, tex_pitch
);
1496 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
1497 r
->zero_block
[tb
+ 1].x
= (mbpx
<< 1) * r
->surface_tex_inv_size
.x
;
1498 r
->zero_block
[tb
+ 1].y
= (mbpy
<< 1) * r
->surface_tex_inv_size
.y
;
1506 grab_macroblock(struct vl_mpeg12_mc_renderer
*r
,
1507 struct pipe_mpeg12_macroblock
*mb
)
1511 assert(r
->num_macroblocks
< r
->macroblocks_per_batch
);
1513 memcpy(&r
->macroblock_buf
[r
->num_macroblocks
], mb
,
1514 sizeof(struct pipe_mpeg12_macroblock
));
1516 grab_blocks(r
, mb
->mbx
, mb
->mby
, mb
->dct_type
, mb
->cbp
, mb
->blocks
);
1518 ++r
->num_macroblocks
;
1522 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer
*renderer
,
1523 struct pipe_context
*pipe
,
1524 unsigned picture_width
,
1525 unsigned picture_height
,
1526 enum pipe_video_chroma_format chroma_format
,
1527 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode
,
1528 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling
,
1535 /* TODO: Implement other policies */
1536 assert(bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
);
1537 /* TODO: Implement this */
1538 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1539 assert(eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
);
1540 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1541 assert(pot_buffers
);
1543 memset(renderer
, 0, sizeof(struct vl_mpeg12_mc_renderer
));
1545 renderer
->pipe
= pipe
;
1546 renderer
->picture_width
= picture_width
;
1547 renderer
->picture_height
= picture_height
;
1548 renderer
->chroma_format
= chroma_format
;
1549 renderer
->bufmode
= bufmode
;
1550 renderer
->eb_handling
= eb_handling
;
1551 renderer
->pot_buffers
= pot_buffers
;
1553 if (!init_pipe_state(renderer
))
1555 if (!init_shaders(renderer
)) {
1556 cleanup_pipe_state(renderer
);
1559 if (!init_buffers(renderer
)) {
1560 cleanup_shaders(renderer
);
1561 cleanup_pipe_state(renderer
);
1565 renderer
->surface
= NULL
;
1566 renderer
->past
= NULL
;
1567 renderer
->future
= NULL
;
1568 for (i
= 0; i
< 3; ++i
)
1569 renderer
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1570 renderer
->num_macroblocks
= 0;
1572 xfer_buffers_map(renderer
);
1578 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer
*renderer
)
1582 xfer_buffers_unmap(renderer
);
1584 cleanup_pipe_state(renderer
);
1585 cleanup_shaders(renderer
);
1586 cleanup_buffers(renderer
);
1590 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1592 struct pipe_texture
*surface
,
1593 struct pipe_texture
*past
,
1594 struct pipe_texture
*future
,
1595 unsigned num_macroblocks
,
1596 struct pipe_mpeg12_macroblock
1597 *mpeg12_macroblocks
,
1598 struct pipe_fence_handle
**fence
)
1600 bool new_surface
= false;
1604 assert(num_macroblocks
);
1605 assert(mpeg12_macroblocks
);
1607 if (renderer
->surface
) {
1608 if (surface
!= renderer
->surface
) {
1609 if (renderer
->num_macroblocks
> 0) {
1610 xfer_buffers_unmap(renderer
);
1617 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1618 assert(surface
!= renderer
->surface
|| renderer
->past
== past
);
1619 assert(surface
!= renderer
->surface
|| renderer
->future
== future
);
1625 renderer
->surface
= surface
;
1626 renderer
->past
= past
;
1627 renderer
->future
= future
;
1628 renderer
->fence
= fence
;
1629 renderer
->surface_tex_inv_size
.x
= 1.0f
/ surface
->width0
;
1630 renderer
->surface_tex_inv_size
.y
= 1.0f
/ surface
->height0
;
1633 while (num_macroblocks
) {
1634 unsigned left_in_batch
= renderer
->macroblocks_per_batch
- renderer
->num_macroblocks
;
1635 unsigned num_to_submit
= MIN2(num_macroblocks
, left_in_batch
);
1638 for (i
= 0; i
< num_to_submit
; ++i
) {
1639 assert(mpeg12_macroblocks
[i
].base
.codec
== PIPE_VIDEO_CODEC_MPEG12
);
1640 grab_macroblock(renderer
, &mpeg12_macroblocks
[i
]);
1643 num_macroblocks
-= num_to_submit
;
1645 if (renderer
->num_macroblocks
== renderer
->macroblocks_per_batch
) {
1646 xfer_buffers_unmap(renderer
);
1648 xfer_buffers_map(renderer
);
1649 /* Next time we get this surface it may have new ref frames */
1650 renderer
->surface
= NULL
;