1 /**************************************************************************
3 * Copyright 2009 Younes Manton.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "vl_mpeg12_mc_renderer.h"
30 #include <pipe/p_context.h>
31 #include <util/u_inlines.h>
32 #include <util/u_format.h>
33 #include <util/u_math.h>
34 #include <util/u_memory.h>
35 #include <tgsi/tgsi_parse.h>
36 #include <tgsi/tgsi_build.h>
37 #include "vl_shader_build.h"
39 #define DEFAULT_BUF_ALIGNMENT 1
40 #define MACROBLOCK_WIDTH 16
41 #define MACROBLOCK_HEIGHT 16
43 #define BLOCK_HEIGHT 8
44 #define ZERO_BLOCK_NIL -1.0f
45 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
57 struct vertex_shader_consts
59 struct vertex4f denorm
;
62 struct fragment_shader_consts
64 struct vertex4f multiplier
;
69 * Muliplier renormalizes block samples from 16 bits to 12 bits.
70 * Divider is used when calculating Y % 2 for choosing top or bottom
71 * field for P or B macroblocks.
72 * TODO: Use immediates.
74 static const struct fragment_shader_consts fs_consts
= {
75 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
76 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
82 struct vertex2f luma_tc
;
83 struct vertex2f cb_tc
;
84 struct vertex2f cr_tc
;
89 MACROBLOCK_TYPE_INTRA
,
90 MACROBLOCK_TYPE_FWD_FRAME_PRED
,
91 MACROBLOCK_TYPE_FWD_FIELD_PRED
,
92 MACROBLOCK_TYPE_BKWD_FRAME_PRED
,
93 MACROBLOCK_TYPE_BKWD_FIELD_PRED
,
94 MACROBLOCK_TYPE_BI_FRAME_PRED
,
95 MACROBLOCK_TYPE_BI_FIELD_PRED
,
101 create_intra_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
103 const unsigned max_tokens
= 50;
105 struct pipe_shader_state vs
;
106 struct tgsi_token
*tokens
;
107 struct tgsi_header
*header
;
109 struct tgsi_full_declaration decl
;
110 struct tgsi_full_instruction inst
;
118 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
119 header
= (struct tgsi_header
*) &tokens
[0];
120 *header
= tgsi_build_header();
121 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
126 * decl i0 ; Vertex pos
127 * decl i1 ; Luma texcoords
128 * decl i2 ; Chroma Cb texcoords
129 * decl i3 ; Chroma Cr texcoords
131 for (i
= 0; i
< 4; i
++) {
132 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
133 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
137 * decl o0 ; Vertex pos
138 * decl o1 ; Luma texcoords
139 * decl o2 ; Chroma Cb texcoords
140 * decl o3 ; Chroma Cr texcoords
142 for (i
= 0; i
< 4; i
++) {
143 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
144 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
148 * mov o0, i0 ; Move input vertex pos to output
149 * mov o1, i1 ; Move input luma texcoords to output
150 * mov o2, i2 ; Move input chroma Cb texcoords to output
151 * mov o3, i3 ; Move input chroma Cr texcoords to output
153 for (i
= 0; i
< 4; ++i
) {
154 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
155 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
160 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
162 assert(ti
<= max_tokens
);
165 r
->i_vs
= r
->pipe
->create_vs_state(r
->pipe
, &vs
);
170 create_intra_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
172 const unsigned max_tokens
= 100;
174 struct pipe_shader_state fs
;
175 struct tgsi_token
*tokens
;
176 struct tgsi_header
*header
;
178 struct tgsi_full_declaration decl
;
179 struct tgsi_full_instruction inst
;
187 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
188 header
= (struct tgsi_header
*) &tokens
[0];
189 *header
= tgsi_build_header();
190 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
195 * decl i0 ; Luma texcoords
196 * decl i1 ; Chroma Cb texcoords
197 * decl i2 ; Chroma Cr texcoords
199 for (i
= 0; i
< 3; ++i
) {
200 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
201 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
204 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
205 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
206 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
208 /* decl o0 ; Fragment color */
209 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
210 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
213 decl
= vl_decl_temps(0, 1);
214 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
217 * decl s0 ; Sampler for luma texture
218 * decl s1 ; Sampler for chroma Cb texture
219 * decl s2 ; Sampler for chroma Cr texture
221 for (i
= 0; i
< 3; ++i
) {
222 decl
= vl_decl_samplers(i
, i
);
223 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
227 * tex2d t1, i0, s0 ; Read texel from luma texture
228 * mov t0.x, t1.x ; Move luma sample into .x component
229 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
230 * mov t0.y, t1.x ; Move Cb sample into .y component
231 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
232 * mov t0.z, t1.x ; Move Cr sample into .z component
234 for (i
= 0; i
< 3; ++i
) {
235 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
236 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
238 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
239 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
240 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
241 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
242 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
243 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
246 /* mul o0, t0, c0 ; Rescale texel to correct range */
247 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
248 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
252 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
254 assert(ti
<= max_tokens
);
257 r
->i_fs
= r
->pipe
->create_fs_state(r
->pipe
, &fs
);
262 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
264 const unsigned max_tokens
= 100;
266 struct pipe_shader_state vs
;
267 struct tgsi_token
*tokens
;
268 struct tgsi_header
*header
;
270 struct tgsi_full_declaration decl
;
271 struct tgsi_full_instruction inst
;
279 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
280 header
= (struct tgsi_header
*) &tokens
[0];
281 *header
= tgsi_build_header();
282 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
287 * decl i0 ; Vertex pos
288 * decl i1 ; Luma texcoords
289 * decl i2 ; Chroma Cb texcoords
290 * decl i3 ; Chroma Cr texcoords
291 * decl i4 ; Ref surface top field texcoords
292 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
294 for (i
= 0; i
< 6; i
++) {
295 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
296 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
300 * decl o0 ; Vertex pos
301 * decl o1 ; Luma texcoords
302 * decl o2 ; Chroma Cb texcoords
303 * decl o3 ; Chroma Cr texcoords
304 * decl o4 ; Ref macroblock texcoords
306 for (i
= 0; i
< 5; i
++) {
307 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
308 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
312 * mov o0, i0 ; Move input vertex pos to output
313 * mov o1, i1 ; Move input luma texcoords to output
314 * mov o2, i2 ; Move input chroma Cb texcoords to output
315 * mov o3, i3 ; Move input chroma Cr texcoords to output
317 for (i
= 0; i
< 4; ++i
) {
318 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
319 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
322 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
323 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
324 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
328 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
330 assert(ti
<= max_tokens
);
333 r
->p_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
339 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
346 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
348 const unsigned max_tokens
= 100;
350 struct pipe_shader_state fs
;
351 struct tgsi_token
*tokens
;
352 struct tgsi_header
*header
;
354 struct tgsi_full_declaration decl
;
355 struct tgsi_full_instruction inst
;
363 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
364 header
= (struct tgsi_header
*) &tokens
[0];
365 *header
= tgsi_build_header();
366 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
371 * decl i0 ; Luma texcoords
372 * decl i1 ; Chroma Cb texcoords
373 * decl i2 ; Chroma Cr texcoords
374 * decl i3 ; Ref macroblock texcoords
376 for (i
= 0; i
< 4; ++i
) {
377 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
378 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
381 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
382 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
383 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
385 /* decl o0 ; Fragment color */
386 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
387 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
390 decl
= vl_decl_temps(0, 1);
391 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
394 * decl s0 ; Sampler for luma texture
395 * decl s1 ; Sampler for chroma Cb texture
396 * decl s2 ; Sampler for chroma Cr texture
397 * decl s3 ; Sampler for ref surface texture
399 for (i
= 0; i
< 4; ++i
) {
400 decl
= vl_decl_samplers(i
, i
);
401 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
405 * tex2d t1, i0, s0 ; Read texel from luma texture
406 * mov t0.x, t1.x ; Move luma sample into .x component
407 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
408 * mov t0.y, t1.x ; Move Cb sample into .y component
409 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
410 * mov t0.z, t1.x ; Move Cr sample into .z component
412 for (i
= 0; i
< 3; ++i
) {
413 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
414 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
416 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
417 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
418 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
419 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
420 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
421 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
424 /* mul t0, t0, c0 ; Rescale texel to correct range */
425 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
426 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
428 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
429 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
430 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
432 /* add o0, t0, t1 ; Add ref and differential to form final output */
433 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
434 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
438 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
440 assert(ti
<= max_tokens
);
443 r
->p_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
449 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
456 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
458 const unsigned max_tokens
= 100;
460 struct pipe_shader_state vs
;
461 struct tgsi_token
*tokens
;
462 struct tgsi_header
*header
;
464 struct tgsi_full_declaration decl
;
465 struct tgsi_full_instruction inst
;
473 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
474 header
= (struct tgsi_header
*) &tokens
[0];
475 *header
= tgsi_build_header();
476 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
481 * decl i0 ; Vertex pos
482 * decl i1 ; Luma texcoords
483 * decl i2 ; Chroma Cb texcoords
484 * decl i3 ; Chroma Cr texcoords
485 * decl i4 ; First ref macroblock top field texcoords
486 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
487 * decl i6 ; Second ref macroblock top field texcoords
488 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
490 for (i
= 0; i
< 8; i
++) {
491 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
492 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
496 * decl o0 ; Vertex pos
497 * decl o1 ; Luma texcoords
498 * decl o2 ; Chroma Cb texcoords
499 * decl o3 ; Chroma Cr texcoords
500 * decl o4 ; First ref macroblock texcoords
501 * decl o5 ; Second ref macroblock texcoords
503 for (i
= 0; i
< 6; i
++) {
504 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
505 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
509 * mov o0, i0 ; Move input vertex pos to output
510 * mov o1, i1 ; Move input luma texcoords to output
511 * mov o2, i2 ; Move input chroma Cb texcoords to output
512 * mov o3, i3 ; Move input chroma Cr texcoords to output
514 for (i
= 0; i
< 4; ++i
) {
515 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
516 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
520 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
521 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
523 for (i
= 0; i
< 2; ++i
) {
524 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
525 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
530 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
532 assert(ti
<= max_tokens
);
535 r
->b_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
541 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
548 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
550 const unsigned max_tokens
= 100;
552 struct pipe_shader_state fs
;
553 struct tgsi_token
*tokens
;
554 struct tgsi_header
*header
;
556 struct tgsi_full_declaration decl
;
557 struct tgsi_full_instruction inst
;
565 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
566 header
= (struct tgsi_header
*) &tokens
[0];
567 *header
= tgsi_build_header();
568 *(struct tgsi_processor
*) &tokens
[1] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
573 * decl i0 ; Luma texcoords
574 * decl i1 ; Chroma Cb texcoords
575 * decl i2 ; Chroma Cr texcoords
576 * decl i3 ; First ref macroblock texcoords
577 * decl i4 ; Second ref macroblock texcoords
579 for (i
= 0; i
< 5; ++i
) {
580 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
581 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
585 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
586 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
588 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
589 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
591 /* decl o0 ; Fragment color */
592 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
593 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
596 decl
= vl_decl_temps(0, 2);
597 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
600 * decl s0 ; Sampler for luma texture
601 * decl s1 ; Sampler for chroma Cb texture
602 * decl s2 ; Sampler for chroma Cr texture
603 * decl s3 ; Sampler for first ref surface texture
604 * decl s4 ; Sampler for second ref surface texture
606 for (i
= 0; i
< 5; ++i
) {
607 decl
= vl_decl_samplers(i
, i
);
608 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
612 * tex2d t1, i0, s0 ; Read texel from luma texture
613 * mov t0.x, t1.x ; Move luma sample into .x component
614 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
615 * mov t0.y, t1.x ; Move Cb sample into .y component
616 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
617 * mov t0.z, t1.x ; Move Cr sample into .z component
619 for (i
= 0; i
< 3; ++i
) {
620 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
621 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
623 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
624 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
625 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
626 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
627 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
628 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
631 /* mul t0, t0, c0 ; Rescale texel to correct range */
632 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
633 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
636 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
637 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
639 for (i
= 0; i
< 2; ++i
) {
640 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
641 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
644 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
645 inst
= vl_inst4(TGSI_OPCODE_LRP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
646 inst
.Src
[0].Register
.SwizzleX
= TGSI_SWIZZLE_X
;
647 inst
.Src
[0].Register
.SwizzleY
= TGSI_SWIZZLE_X
;
648 inst
.Src
[0].Register
.SwizzleZ
= TGSI_SWIZZLE_X
;
649 inst
.Src
[0].Register
.SwizzleW
= TGSI_SWIZZLE_X
;
650 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
652 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
653 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
654 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
658 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
660 assert(ti
<= max_tokens
);
663 r
->b_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
669 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
676 xfer_buffers_map(struct vl_mpeg12_mc_renderer
*r
)
682 for (i
= 0; i
< 3; ++i
) {
683 r
->tex_transfer
[i
] = r
->pipe
->screen
->get_tex_transfer
685 r
->pipe
->screen
, r
->textures
.all
[i
],
686 0, 0, 0, PIPE_TRANSFER_WRITE
, 0, 0,
687 r
->textures
.all
[i
]->width0
, r
->textures
.all
[i
]->height0
690 r
->texels
[i
] = r
->pipe
->screen
->transfer_map(r
->pipe
->screen
, r
->tex_transfer
[i
]);
695 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer
*r
)
701 for (i
= 0; i
< 3; ++i
) {
702 r
->pipe
->screen
->transfer_unmap(r
->pipe
->screen
, r
->tex_transfer
[i
]);
703 r
->pipe
->screen
->tex_transfer_destroy(r
->tex_transfer
[i
]);
708 init_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
710 struct pipe_sampler_state sampler
;
711 struct pipe_vertex_element vertex_elems
[8];
717 r
->viewport
.scale
[0] = r
->pot_buffers
?
718 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
719 r
->viewport
.scale
[1] = r
->pot_buffers
?
720 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
721 r
->viewport
.scale
[2] = 1;
722 r
->viewport
.scale
[3] = 1;
723 r
->viewport
.translate
[0] = 0;
724 r
->viewport
.translate
[1] = 0;
725 r
->viewport
.translate
[2] = 0;
726 r
->viewport
.translate
[3] = 0;
728 r
->scissor
.maxx
= r
->pot_buffers
?
729 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
730 r
->scissor
.maxy
= r
->pot_buffers
?
731 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
733 r
->fb_state
.width
= r
->pot_buffers
?
734 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
735 r
->fb_state
.height
= r
->pot_buffers
?
736 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
737 r
->fb_state
.nr_cbufs
= 1;
738 r
->fb_state
.zsbuf
= NULL
;
741 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
743 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
||
744 r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
745 filters
[1] = PIPE_TEX_FILTER_NEAREST
;
746 filters
[2] = PIPE_TEX_FILTER_NEAREST
;
749 filters
[1] = PIPE_TEX_FILTER_LINEAR
;
750 filters
[2] = PIPE_TEX_FILTER_LINEAR
;
752 /* Fwd, bkwd ref filters */
753 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
754 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
756 for (i
= 0; i
< 5; ++i
) {
757 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
758 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
759 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
760 sampler
.min_img_filter
= filters
[i
];
761 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
762 sampler
.mag_img_filter
= filters
[i
];
763 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
764 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
765 sampler
.normalized_coords
= 1;
766 /*sampler.shadow_ambient = ; */
767 /*sampler.lod_bias = ; */
769 /*sampler.max_lod = ; */
770 /*sampler.border_color[i] = ; */
771 /*sampler.max_anisotropy = ; */
772 r
->samplers
.all
[i
] = r
->pipe
->create_sampler_state(r
->pipe
, &sampler
);
775 /* Position element */
776 vertex_elems
[0].src_offset
= 0;
777 vertex_elems
[0].instance_divisor
= 0;
778 vertex_elems
[0].vertex_buffer_index
= 0;
779 vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
781 /* Luma, texcoord element */
782 vertex_elems
[1].src_offset
= sizeof(struct vertex2f
);
783 vertex_elems
[1].instance_divisor
= 0;
784 vertex_elems
[1].vertex_buffer_index
= 0;
785 vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
787 /* Chroma Cr texcoord element */
788 vertex_elems
[2].src_offset
= sizeof(struct vertex2f
) * 2;
789 vertex_elems
[2].instance_divisor
= 0;
790 vertex_elems
[2].vertex_buffer_index
= 0;
791 vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
793 /* Chroma Cb texcoord element */
794 vertex_elems
[3].src_offset
= sizeof(struct vertex2f
) * 3;
795 vertex_elems
[3].instance_divisor
= 0;
796 vertex_elems
[3].vertex_buffer_index
= 0;
797 vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
799 /* First ref surface top field texcoord element */
800 vertex_elems
[4].src_offset
= 0;
801 vertex_elems
[4].instance_divisor
= 0;
802 vertex_elems
[4].vertex_buffer_index
= 1;
803 vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
805 /* First ref surface bottom field texcoord element */
806 vertex_elems
[5].src_offset
= sizeof(struct vertex2f
);
807 vertex_elems
[5].instance_divisor
= 0;
808 vertex_elems
[5].vertex_buffer_index
= 1;
809 vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
811 /* Second ref surface top field texcoord element */
812 vertex_elems
[6].src_offset
= 0;
813 vertex_elems
[6].instance_divisor
= 0;
814 vertex_elems
[6].vertex_buffer_index
= 2;
815 vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
817 /* Second ref surface bottom field texcoord element */
818 vertex_elems
[7].src_offset
= sizeof(struct vertex2f
);
819 vertex_elems
[7].instance_divisor
= 0;
820 vertex_elems
[7].vertex_buffer_index
= 2;
821 vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
823 /* need versions with 4,6 and 8 vertex elems */
824 r
->vertex_elems
[0] = r
->pipe
->create_vertex_elements_state(r
->pipe
, 4, vertex_elems
);
825 r
->vertex_elems
[1] = r
->pipe
->create_vertex_elements_state(r
->pipe
, 6, vertex_elems
);
826 r
->vertex_elems
[2] = r
->pipe
->create_vertex_elements_state(r
->pipe
, 8, vertex_elems
);
832 cleanup_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
838 for (i
= 0; i
< 5; ++i
)
839 r
->pipe
->delete_sampler_state(r
->pipe
, r
->samplers
.all
[i
]);
840 for (i
= 0; i
< 3; i
++)
841 r
->pipe
->delete_vertex_elements_state(r
->pipe
, r
->vertex_elems
[i
]);
845 init_shaders(struct vl_mpeg12_mc_renderer
*r
)
849 create_intra_vert_shader(r
);
850 create_intra_frag_shader(r
);
851 create_frame_pred_vert_shader(r
);
852 create_frame_pred_frag_shader(r
);
853 create_frame_bi_pred_vert_shader(r
);
854 create_frame_bi_pred_frag_shader(r
);
860 cleanup_shaders(struct vl_mpeg12_mc_renderer
*r
)
864 r
->pipe
->delete_vs_state(r
->pipe
, r
->i_vs
);
865 r
->pipe
->delete_fs_state(r
->pipe
, r
->i_fs
);
866 r
->pipe
->delete_vs_state(r
->pipe
, r
->p_vs
[0]);
867 r
->pipe
->delete_fs_state(r
->pipe
, r
->p_fs
[0]);
868 r
->pipe
->delete_vs_state(r
->pipe
, r
->b_vs
[0]);
869 r
->pipe
->delete_fs_state(r
->pipe
, r
->b_fs
[0]);
873 init_buffers(struct vl_mpeg12_mc_renderer
*r
)
875 struct pipe_texture
template;
878 align(r
->picture_width
, MACROBLOCK_WIDTH
) / MACROBLOCK_WIDTH
;
880 align(r
->picture_height
, MACROBLOCK_HEIGHT
) / MACROBLOCK_HEIGHT
;
886 r
->macroblocks_per_batch
=
887 mbw
* (r
->bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
? mbh
: 1);
888 r
->num_macroblocks
= 0;
889 r
->macroblock_buf
= MALLOC(r
->macroblocks_per_batch
* sizeof(struct pipe_mpeg12_macroblock
));
891 memset(&template, 0, sizeof(struct pipe_texture
));
892 template.target
= PIPE_TEXTURE_2D
;
893 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
894 template.format
= PIPE_FORMAT_R16_SNORM
;
895 template.last_level
= 0;
896 template.width0
= r
->pot_buffers
?
897 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
898 template.height0
= r
->pot_buffers
?
899 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
901 template.tex_usage
= PIPE_TEXTURE_USAGE_SAMPLER
| PIPE_TEXTURE_USAGE_DYNAMIC
;
903 r
->textures
.individual
.y
= r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
905 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
) {
906 template.width0
= r
->pot_buffers
?
907 util_next_power_of_two(r
->picture_width
/ 2) :
908 r
->picture_width
/ 2;
909 template.height0
= r
->pot_buffers
?
910 util_next_power_of_two(r
->picture_height
/ 2) :
911 r
->picture_height
/ 2;
913 else if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
)
914 template.height0
= r
->pot_buffers
?
915 util_next_power_of_two(r
->picture_height
/ 2) :
916 r
->picture_height
/ 2;
918 r
->textures
.individual
.cb
=
919 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
920 r
->textures
.individual
.cr
=
921 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
923 r
->vertex_bufs
.individual
.ycbcr
.stride
= sizeof(struct vertex2f
) * 4;
924 r
->vertex_bufs
.individual
.ycbcr
.max_index
= 24 * r
->macroblocks_per_batch
- 1;
925 r
->vertex_bufs
.individual
.ycbcr
.buffer_offset
= 0;
926 r
->vertex_bufs
.individual
.ycbcr
.buffer
= pipe_buffer_create
929 DEFAULT_BUF_ALIGNMENT
,
930 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
931 sizeof(struct vertex2f
) * 4 * 24 * r
->macroblocks_per_batch
934 for (i
= 1; i
< 3; ++i
) {
935 r
->vertex_bufs
.all
[i
].stride
= sizeof(struct vertex2f
) * 2;
936 r
->vertex_bufs
.all
[i
].max_index
= 24 * r
->macroblocks_per_batch
- 1;
937 r
->vertex_bufs
.all
[i
].buffer_offset
= 0;
938 r
->vertex_bufs
.all
[i
].buffer
= pipe_buffer_create
941 DEFAULT_BUF_ALIGNMENT
,
942 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
943 sizeof(struct vertex2f
) * 2 * 24 * r
->macroblocks_per_batch
947 r
->vs_const_buf
= pipe_buffer_create
950 DEFAULT_BUF_ALIGNMENT
,
951 PIPE_BUFFER_USAGE_CONSTANT
| PIPE_BUFFER_USAGE_DISCARD
,
952 sizeof(struct vertex_shader_consts
)
955 r
->fs_const_buf
= pipe_buffer_create
958 DEFAULT_BUF_ALIGNMENT
,
959 PIPE_BUFFER_USAGE_CONSTANT
, sizeof(struct fragment_shader_consts
)
964 pipe_buffer_map(r
->pipe
->screen
, r
->fs_const_buf
, PIPE_BUFFER_USAGE_CPU_WRITE
),
965 &fs_consts
, sizeof(struct fragment_shader_consts
)
968 pipe_buffer_unmap(r
->pipe
->screen
, r
->fs_const_buf
);
974 cleanup_buffers(struct vl_mpeg12_mc_renderer
*r
)
980 pipe_buffer_reference(&r
->vs_const_buf
, NULL
);
981 pipe_buffer_reference(&r
->fs_const_buf
, NULL
);
983 for (i
= 0; i
< 3; ++i
)
984 pipe_buffer_reference(&r
->vertex_bufs
.all
[i
].buffer
, NULL
);
986 for (i
= 0; i
< 3; ++i
)
987 pipe_texture_reference(&r
->textures
.all
[i
], NULL
);
989 FREE(r
->macroblock_buf
);
992 static enum MACROBLOCK_TYPE
993 get_macroblock_type(struct pipe_mpeg12_macroblock
*mb
)
997 switch (mb
->mb_type
) {
998 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
999 return MACROBLOCK_TYPE_INTRA
;
1000 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
1001 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
1002 MACROBLOCK_TYPE_FWD_FRAME_PRED
: MACROBLOCK_TYPE_FWD_FIELD_PRED
;
1003 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
1004 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
1005 MACROBLOCK_TYPE_BKWD_FRAME_PRED
: MACROBLOCK_TYPE_BKWD_FIELD_PRED
;
1006 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
1007 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
1008 MACROBLOCK_TYPE_BI_FRAME_PRED
: MACROBLOCK_TYPE_BI_FIELD_PRED
;
1017 /* XXX: One of these days this will have to be killed with fire */
1018 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
1020 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
1021 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1022 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
1023 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
1024 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1025 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
1027 if (!use_zb || (cbp) & (lm)) \
1029 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
1030 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1031 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
1032 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
1033 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1034 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1038 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1039 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1040 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1041 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1042 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1043 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1046 if (!use_zb || (cbp) & (cbm)) \
1048 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1049 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1050 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1051 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1052 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1053 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1057 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1058 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1059 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1060 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1061 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1062 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1065 if (!use_zb || (cbp) & (crm)) \
1067 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1068 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1069 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1070 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1071 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1072 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1076 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1077 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1078 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1079 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1080 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1081 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1086 gen_macroblock_verts(struct vl_mpeg12_mc_renderer
*r
,
1087 struct pipe_mpeg12_macroblock
*mb
, unsigned pos
,
1088 struct vert_stream_0
*ycbcr_vb
, struct vertex2f
**ref_vb
)
1090 struct vertex2f mo_vec
[2];
1097 assert(pos
< r
->macroblocks_per_batch
);
1102 switch (mb
->mb_type
) {
1103 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
1105 struct vertex2f
*vb
;
1107 assert(ref_vb
&& ref_vb
[1]);
1109 vb
= ref_vb
[1] + pos
* 2 * 24;
1111 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1112 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1114 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
1115 for (i
= 0; i
< 24 * 2; i
+= 2) {
1116 vb
[i
].x
= mo_vec
[0].x
;
1117 vb
[i
].y
= mo_vec
[0].y
;
1121 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1122 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1124 for (i
= 0; i
< 24 * 2; i
+= 2) {
1125 vb
[i
].x
= mo_vec
[0].x
;
1126 vb
[i
].y
= mo_vec
[0].y
;
1127 vb
[i
+ 1].x
= mo_vec
[1].x
;
1128 vb
[i
+ 1].y
= mo_vec
[1].y
;
1134 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
1135 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
1137 struct vertex2f
*vb
;
1139 assert(ref_vb
&& ref_vb
[0]);
1141 vb
= ref_vb
[0] + pos
* 2 * 24;
1143 if (mb
->mb_type
== PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
) {
1144 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1145 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1147 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
) {
1148 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1149 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1153 mo_vec
[0].x
= mb
->pmv
[0][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1154 mo_vec
[0].y
= mb
->pmv
[0][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1156 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
) {
1157 mo_vec
[1].x
= mb
->pmv
[1][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1158 mo_vec
[1].y
= mb
->pmv
[1][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1162 if (mb
->mb_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
) {
1163 for (i
= 0; i
< 24 * 2; i
+= 2) {
1164 vb
[i
].x
= mo_vec
[0].x
;
1165 vb
[i
].y
= mo_vec
[0].y
;
1169 for (i
= 0; i
< 24 * 2; i
+= 2) {
1170 vb
[i
].x
= mo_vec
[0].x
;
1171 vb
[i
].y
= mo_vec
[0].y
;
1172 vb
[i
+ 1].x
= mo_vec
[1].x
;
1173 vb
[i
+ 1].y
= mo_vec
[1].y
;
1179 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
1181 const struct vertex2f unit
=
1183 r
->surface_tex_inv_size
.x
* MACROBLOCK_WIDTH
,
1184 r
->surface_tex_inv_size
.y
* MACROBLOCK_HEIGHT
1186 const struct vertex2f half
=
1188 r
->surface_tex_inv_size
.x
* (MACROBLOCK_WIDTH
/ 2),
1189 r
->surface_tex_inv_size
.y
* (MACROBLOCK_HEIGHT
/ 2)
1191 const bool use_zb
= r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
;
1193 struct vert_stream_0
*vb
= ycbcr_vb
+ pos
* 24;
1195 SET_BLOCK(vb
, mb
->cbp
, mb
->mbx
, mb
->mby
,
1196 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
1197 32, 2, 1, use_zb
, r
->zero_block
);
1199 SET_BLOCK(vb
+ 6, mb
->cbp
, mb
->mbx
, mb
->mby
,
1200 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
1201 16, 2, 1, use_zb
, r
->zero_block
);
1203 SET_BLOCK(vb
+ 12, mb
->cbp
, mb
->mbx
, mb
->mby
,
1204 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
1205 8, 2, 1, use_zb
, r
->zero_block
);
1207 SET_BLOCK(vb
+ 18, mb
->cbp
, mb
->mbx
, mb
->mby
,
1208 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
1209 4, 2, 1, use_zb
, r
->zero_block
);
1219 gen_macroblock_stream(struct vl_mpeg12_mc_renderer
*r
,
1220 unsigned *num_macroblocks
)
1222 unsigned offset
[NUM_MACROBLOCK_TYPES
];
1223 struct vert_stream_0
*ycbcr_vb
;
1224 struct vertex2f
*ref_vb
[2];
1228 assert(num_macroblocks
);
1230 for (i
= 0; i
< r
->num_macroblocks
; ++i
) {
1231 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1232 ++num_macroblocks
[mb_type
];
1237 for (i
= 1; i
< NUM_MACROBLOCK_TYPES
; ++i
)
1238 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
1240 ycbcr_vb
= (struct vert_stream_0
*)pipe_buffer_map
1243 r
->vertex_bufs
.individual
.ycbcr
.buffer
,
1244 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1247 for (i
= 0; i
< 2; ++i
)
1248 ref_vb
[i
] = (struct vertex2f
*)pipe_buffer_map
1251 r
->vertex_bufs
.individual
.ref
[i
].buffer
,
1252 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1255 for (i
= 0; i
< r
->num_macroblocks
; ++i
) {
1256 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1258 gen_macroblock_verts(r
, &r
->macroblock_buf
[i
], offset
[mb_type
],
1264 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ycbcr
.buffer
);
1265 for (i
= 0; i
< 2; ++i
)
1266 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ref
[i
].buffer
);
1270 flush(struct vl_mpeg12_mc_renderer
*r
)
1272 unsigned num_macroblocks
[NUM_MACROBLOCK_TYPES
] = { 0 };
1273 unsigned vb_start
= 0;
1274 struct vertex_shader_consts
*vs_consts
;
1278 assert(r
->num_macroblocks
== r
->macroblocks_per_batch
);
1280 gen_macroblock_stream(r
, num_macroblocks
);
1282 r
->fb_state
.cbufs
[0] = r
->pipe
->screen
->get_tex_surface
1284 r
->pipe
->screen
, r
->surface
,
1285 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1288 r
->pipe
->set_framebuffer_state(r
->pipe
, &r
->fb_state
);
1289 r
->pipe
->set_viewport_state(r
->pipe
, &r
->viewport
);
1290 r
->pipe
->set_scissor_state(r
->pipe
, &r
->scissor
);
1292 vs_consts
= pipe_buffer_map
1294 r
->pipe
->screen
, r
->vs_const_buf
,
1295 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1298 vs_consts
->denorm
.x
= r
->surface
->width0
;
1299 vs_consts
->denorm
.y
= r
->surface
->height0
;
1301 pipe_buffer_unmap(r
->pipe
->screen
, r
->vs_const_buf
);
1303 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_VERTEX
, 0,
1305 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_FRAGMENT
, 0,
1308 if (num_macroblocks
[MACROBLOCK_TYPE_INTRA
] > 0) {
1309 r
->pipe
->set_vertex_buffers(r
->pipe
, 1, r
->vertex_bufs
.all
);
1310 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[0]);
1311 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 3, r
->textures
.all
);
1312 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 3, r
->samplers
.all
);
1313 r
->pipe
->bind_vs_state(r
->pipe
, r
->i_vs
);
1314 r
->pipe
->bind_fs_state(r
->pipe
, r
->i_fs
);
1316 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1317 num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24);
1318 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24;
1321 if (num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] > 0) {
1322 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1323 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[1]);
1324 r
->textures
.individual
.ref
[0] = r
->past
;
1325 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1326 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1327 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1328 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1330 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1331 num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24);
1332 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24;
1335 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
1336 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1337 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[1]);
1338 r
->textures
.individual
.ref
[0] = r
->past
;
1339 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1340 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1341 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1342 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1344 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1345 num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24);
1346 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24;
1349 if (num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] > 0) {
1350 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1351 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[1]);
1352 r
->textures
.individual
.ref
[0] = r
->future
;
1353 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1354 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1355 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1356 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1358 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1359 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24);
1360 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24;
1363 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
1364 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1365 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[1]);
1366 r
->textures
.individual
.ref
[0] = r
->future
;
1367 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1368 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1369 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1370 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1372 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1373 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24);
1374 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24;
1377 if (num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] > 0) {
1378 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1379 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[2]);
1380 r
->textures
.individual
.ref
[0] = r
->past
;
1381 r
->textures
.individual
.ref
[1] = r
->future
;
1382 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1383 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1384 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[0]);
1385 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[0]);
1387 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1388 num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24);
1389 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24;
1392 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
1393 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1394 r
->pipe
->bind_vertex_elements_state(r
->pipe
, r
->vertex_elems
[2]);
1395 r
->textures
.individual
.ref
[0] = r
->past
;
1396 r
->textures
.individual
.ref
[1] = r
->future
;
1397 r
->pipe
->set_fragment_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1398 r
->pipe
->bind_fragment_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1399 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[1]);
1400 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[1]);
1402 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1403 num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24);
1404 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24;
1407 r
->pipe
->flush(r
->pipe
, PIPE_FLUSH_RENDER_CACHE
, r
->fence
);
1408 pipe_surface_reference(&r
->fb_state
.cbufs
[0], NULL
);
1410 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
1411 for (i
= 0; i
< 3; ++i
)
1412 r
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1414 r
->num_macroblocks
= 0;
1418 grab_frame_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1425 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1426 memcpy(dst
+ y
* dst_pitch
, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1430 grab_field_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1437 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1438 memcpy(dst
+ y
* dst_pitch
* 2, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1442 fill_zero_block(short *dst
, unsigned dst_pitch
)
1448 for (y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1449 memset(dst
+ y
* dst_pitch
, 0, BLOCK_WIDTH
* 2);
1453 grab_blocks(struct vl_mpeg12_mc_renderer
*r
, unsigned mbx
, unsigned mby
,
1454 enum pipe_mpeg12_dct_type dct_type
, unsigned cbp
, short *blocks
)
1458 unsigned tb
= 0, sb
= 0;
1459 unsigned mbpx
= mbx
* MACROBLOCK_WIDTH
, mbpy
= mby
* MACROBLOCK_HEIGHT
;
1465 tex_pitch
= r
->tex_transfer
[0]->stride
/ util_format_get_blocksize(r
->tex_transfer
[0]->texture
->format
);
1466 texels
= r
->texels
[0] + mbpy
* tex_pitch
+ mbpx
;
1468 for (y
= 0; y
< 2; ++y
) {
1469 for (x
= 0; x
< 2; ++x
, ++tb
) {
1470 if ((cbp
>> (5 - tb
)) & 1) {
1471 if (dct_type
== PIPE_MPEG12_DCT_TYPE_FRAME
) {
1472 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1473 texels
+ y
* tex_pitch
* BLOCK_WIDTH
+
1474 x
* BLOCK_WIDTH
, tex_pitch
);
1477 grab_field_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1478 texels
+ y
* tex_pitch
+ x
* BLOCK_WIDTH
,
1484 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
) {
1485 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1486 ZERO_BLOCK_IS_NIL(r
->zero_block
[0])) {
1487 fill_zero_block(texels
+ y
* tex_pitch
* BLOCK_WIDTH
+ x
* BLOCK_WIDTH
, tex_pitch
);
1488 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
1489 r
->zero_block
[0].x
= (mbpx
+ x
* 8) * r
->surface_tex_inv_size
.x
;
1490 r
->zero_block
[0].y
= (mbpy
+ y
* 8) * r
->surface_tex_inv_size
.y
;
1497 /* TODO: Implement 422, 444 */
1498 assert(r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
1503 for (tb
= 0; tb
< 2; ++tb
) {
1504 tex_pitch
= r
->tex_transfer
[tb
+ 1]->stride
/ util_format_get_blocksize(r
->tex_transfer
[tb
+ 1]->texture
->format
);
1505 texels
= r
->texels
[tb
+ 1] + mbpy
* tex_pitch
+ mbpx
;
1507 if ((cbp
>> (1 - tb
)) & 1) {
1508 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
, texels
, tex_pitch
);
1511 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
) {
1512 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1513 ZERO_BLOCK_IS_NIL(r
->zero_block
[tb
+ 1])) {
1514 fill_zero_block(texels
, tex_pitch
);
1515 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
) {
1516 r
->zero_block
[tb
+ 1].x
= (mbpx
<< 1) * r
->surface_tex_inv_size
.x
;
1517 r
->zero_block
[tb
+ 1].y
= (mbpy
<< 1) * r
->surface_tex_inv_size
.y
;
1525 grab_macroblock(struct vl_mpeg12_mc_renderer
*r
,
1526 struct pipe_mpeg12_macroblock
*mb
)
1530 assert(r
->num_macroblocks
< r
->macroblocks_per_batch
);
1532 memcpy(&r
->macroblock_buf
[r
->num_macroblocks
], mb
,
1533 sizeof(struct pipe_mpeg12_macroblock
));
1535 grab_blocks(r
, mb
->mbx
, mb
->mby
, mb
->dct_type
, mb
->cbp
, mb
->blocks
);
1537 ++r
->num_macroblocks
;
1541 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer
*renderer
,
1542 struct pipe_context
*pipe
,
1543 unsigned picture_width
,
1544 unsigned picture_height
,
1545 enum pipe_video_chroma_format chroma_format
,
1546 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode
,
1547 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling
,
1554 /* TODO: Implement other policies */
1555 assert(bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
);
1556 /* TODO: Implement this */
1557 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1558 assert(eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
);
1559 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1560 assert(pot_buffers
);
1562 memset(renderer
, 0, sizeof(struct vl_mpeg12_mc_renderer
));
1564 renderer
->pipe
= pipe
;
1565 renderer
->picture_width
= picture_width
;
1566 renderer
->picture_height
= picture_height
;
1567 renderer
->chroma_format
= chroma_format
;
1568 renderer
->bufmode
= bufmode
;
1569 renderer
->eb_handling
= eb_handling
;
1570 renderer
->pot_buffers
= pot_buffers
;
1572 if (!init_pipe_state(renderer
))
1574 if (!init_shaders(renderer
)) {
1575 cleanup_pipe_state(renderer
);
1578 if (!init_buffers(renderer
)) {
1579 cleanup_shaders(renderer
);
1580 cleanup_pipe_state(renderer
);
1584 renderer
->surface
= NULL
;
1585 renderer
->past
= NULL
;
1586 renderer
->future
= NULL
;
1587 for (i
= 0; i
< 3; ++i
)
1588 renderer
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1589 renderer
->num_macroblocks
= 0;
1591 xfer_buffers_map(renderer
);
1597 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer
*renderer
)
1601 xfer_buffers_unmap(renderer
);
1603 cleanup_pipe_state(renderer
);
1604 cleanup_shaders(renderer
);
1605 cleanup_buffers(renderer
);
1609 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1611 struct pipe_texture
*surface
,
1612 struct pipe_texture
*past
,
1613 struct pipe_texture
*future
,
1614 unsigned num_macroblocks
,
1615 struct pipe_mpeg12_macroblock
1616 *mpeg12_macroblocks
,
1617 struct pipe_fence_handle
**fence
)
1619 bool new_surface
= false;
1623 assert(num_macroblocks
);
1624 assert(mpeg12_macroblocks
);
1626 if (renderer
->surface
) {
1627 if (surface
!= renderer
->surface
) {
1628 if (renderer
->num_macroblocks
> 0) {
1629 xfer_buffers_unmap(renderer
);
1636 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1637 assert(surface
!= renderer
->surface
|| renderer
->past
== past
);
1638 assert(surface
!= renderer
->surface
|| renderer
->future
== future
);
1644 renderer
->surface
= surface
;
1645 renderer
->past
= past
;
1646 renderer
->future
= future
;
1647 renderer
->fence
= fence
;
1648 renderer
->surface_tex_inv_size
.x
= 1.0f
/ surface
->width0
;
1649 renderer
->surface_tex_inv_size
.y
= 1.0f
/ surface
->height0
;
1652 while (num_macroblocks
) {
1653 unsigned left_in_batch
= renderer
->macroblocks_per_batch
- renderer
->num_macroblocks
;
1654 unsigned num_to_submit
= MIN2(num_macroblocks
, left_in_batch
);
1657 for (i
= 0; i
< num_to_submit
; ++i
) {
1658 assert(mpeg12_macroblocks
[i
].base
.codec
== PIPE_VIDEO_CODEC_MPEG12
);
1659 grab_macroblock(renderer
, &mpeg12_macroblocks
[i
]);
1662 num_macroblocks
-= num_to_submit
;
1664 if (renderer
->num_macroblocks
== renderer
->macroblocks_per_batch
) {
1665 xfer_buffers_unmap(renderer
);
1667 xfer_buffers_map(renderer
);
1668 /* Next time we get this surface it may have new ref frames */
1669 renderer
->surface
= NULL
;