1 #include "vl_mpeg12_mc_renderer.h"
3 #include <pipe/p_context.h>
4 #include <pipe/p_inlines.h>
5 #include <util/u_math.h>
6 #include <util/u_memory.h>
7 #include <tgsi/tgsi_parse.h>
8 #include <tgsi/tgsi_build.h>
9 #include "vl_shader_build.h"
11 #define DEFAULT_BUF_ALIGNMENT 1
12 #define MACROBLOCK_WIDTH 16
13 #define MACROBLOCK_HEIGHT 16
15 #define BLOCK_HEIGHT 8
16 #define ZERO_BLOCK_NIL -1.0f
17 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
29 struct vertex_shader_consts
31 struct vertex4f denorm
;
34 struct fragment_shader_consts
36 struct vertex4f multiplier
;
41 * Muliplier renormalizes block samples from 16 bits to 12 bits.
42 * Divider is used when calculating Y % 2 for choosing top or bottom
43 * field for P or B macroblocks.
44 * TODO: Use immediates.
46 static const struct fragment_shader_consts fs_consts
= {
47 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
48 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
54 struct vertex2f luma_tc
;
55 struct vertex2f cb_tc
;
56 struct vertex2f cr_tc
;
61 MACROBLOCK_TYPE_INTRA
,
62 MACROBLOCK_TYPE_FWD_FRAME_PRED
,
63 MACROBLOCK_TYPE_FWD_FIELD_PRED
,
64 MACROBLOCK_TYPE_BKWD_FRAME_PRED
,
65 MACROBLOCK_TYPE_BKWD_FIELD_PRED
,
66 MACROBLOCK_TYPE_BI_FRAME_PRED
,
67 MACROBLOCK_TYPE_BI_FIELD_PRED
,
73 create_intra_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
75 const unsigned max_tokens
= 50;
77 struct pipe_shader_state vs
;
78 struct tgsi_token
*tokens
;
79 struct tgsi_header
*header
;
81 struct tgsi_full_declaration decl
;
82 struct tgsi_full_instruction inst
;
88 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
89 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
90 header
= (struct tgsi_header
*) &tokens
[1];
91 *header
= tgsi_build_header();
92 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
97 * decl i0 ; Vertex pos
98 * decl i1 ; Luma texcoords
99 * decl i2 ; Chroma Cb texcoords
100 * decl i3 ; Chroma Cr texcoords
102 for (unsigned i
= 0; i
< 4; i
++)
104 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
105 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
109 * decl o0 ; Vertex pos
110 * decl o1 ; Luma texcoords
111 * decl o2 ; Chroma Cb texcoords
112 * decl o3 ; Chroma Cr texcoords
114 for (unsigned i
= 0; i
< 4; i
++)
116 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
117 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
121 * mov o0, i0 ; Move input vertex pos to output
122 * mov o1, i1 ; Move input luma texcoords to output
123 * mov o2, i2 ; Move input chroma Cb texcoords to output
124 * mov o3, i3 ; Move input chroma Cr texcoords to output
126 for (unsigned i
= 0; i
< 4; ++i
)
128 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
129 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
134 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
136 assert(ti
<= max_tokens
);
139 r
->i_vs
= r
->pipe
->create_vs_state(r
->pipe
, &vs
);
144 create_intra_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
146 const unsigned max_tokens
= 100;
148 struct pipe_shader_state fs
;
149 struct tgsi_token
*tokens
;
150 struct tgsi_header
*header
;
152 struct tgsi_full_declaration decl
;
153 struct tgsi_full_instruction inst
;
159 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
160 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
161 header
= (struct tgsi_header
*) &tokens
[1];
162 *header
= tgsi_build_header();
163 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
168 * decl i0 ; Luma texcoords
169 * decl i1 ; Chroma Cb texcoords
170 * decl i2 ; Chroma Cr texcoords
172 for (unsigned i
= 0; i
< 3; ++i
)
174 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
175 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
178 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
179 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
180 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
182 /* decl o0 ; Fragment color */
183 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
184 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
187 decl
= vl_decl_temps(0, 1);
188 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
191 * decl s0 ; Sampler for luma texture
192 * decl s1 ; Sampler for chroma Cb texture
193 * decl s2 ; Sampler for chroma Cr texture
195 for (unsigned i
= 0; i
< 3; ++i
)
197 decl
= vl_decl_samplers(i
, i
);
198 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
202 * tex2d t1, i0, s0 ; Read texel from luma texture
203 * mov t0.x, t1.x ; Move luma sample into .x component
204 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
205 * mov t0.y, t1.x ; Move Cb sample into .y component
206 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
207 * mov t0.z, t1.x ; Move Cr sample into .z component
209 for (unsigned i
= 0; i
< 3; ++i
)
211 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
212 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
214 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
215 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
216 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
217 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
218 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
219 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
222 /* mul o0, t0, c0 ; Rescale texel to correct range */
223 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
224 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
228 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
230 assert(ti
<= max_tokens
);
233 r
->i_fs
= r
->pipe
->create_fs_state(r
->pipe
, &fs
);
238 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
240 const unsigned max_tokens
= 100;
242 struct pipe_shader_state vs
;
243 struct tgsi_token
*tokens
;
244 struct tgsi_header
*header
;
246 struct tgsi_full_declaration decl
;
247 struct tgsi_full_instruction inst
;
253 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
254 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
255 header
= (struct tgsi_header
*) &tokens
[1];
256 *header
= tgsi_build_header();
257 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
262 * decl i0 ; Vertex pos
263 * decl i1 ; Luma texcoords
264 * decl i2 ; Chroma Cb texcoords
265 * decl i3 ; Chroma Cr texcoords
266 * decl i4 ; Ref surface top field texcoords
267 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
269 for (unsigned i
= 0; i
< 6; i
++)
271 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
272 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
276 * decl o0 ; Vertex pos
277 * decl o1 ; Luma texcoords
278 * decl o2 ; Chroma Cb texcoords
279 * decl o3 ; Chroma Cr texcoords
280 * decl o4 ; Ref macroblock texcoords
282 for (unsigned i
= 0; i
< 5; i
++)
284 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
285 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
289 * mov o0, i0 ; Move input vertex pos to output
290 * mov o1, i1 ; Move input luma texcoords to output
291 * mov o2, i2 ; Move input chroma Cb texcoords to output
292 * mov o3, i3 ; Move input chroma Cr texcoords to output
294 for (unsigned i
= 0; i
< 4; ++i
)
296 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
297 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
300 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
301 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
302 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
306 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
308 assert(ti
<= max_tokens
);
311 r
->p_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
316 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
322 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
324 const unsigned max_tokens
= 100;
326 struct pipe_shader_state fs
;
327 struct tgsi_token
*tokens
;
328 struct tgsi_header
*header
;
330 struct tgsi_full_declaration decl
;
331 struct tgsi_full_instruction inst
;
337 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
338 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
339 header
= (struct tgsi_header
*) &tokens
[1];
340 *header
= tgsi_build_header();
341 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
346 * decl i0 ; Luma texcoords
347 * decl i1 ; Chroma Cb texcoords
348 * decl i2 ; Chroma Cr texcoords
349 * decl i3 ; Ref macroblock texcoords
351 for (unsigned i
= 0; i
< 4; ++i
)
353 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
354 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
357 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
358 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
359 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
361 /* decl o0 ; Fragment color */
362 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
363 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
366 decl
= vl_decl_temps(0, 1);
367 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
370 * decl s0 ; Sampler for luma texture
371 * decl s1 ; Sampler for chroma Cb texture
372 * decl s2 ; Sampler for chroma Cr texture
373 * decl s3 ; Sampler for ref surface texture
375 for (unsigned i
= 0; i
< 4; ++i
)
377 decl
= vl_decl_samplers(i
, i
);
378 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
382 * tex2d t1, i0, s0 ; Read texel from luma texture
383 * mov t0.x, t1.x ; Move luma sample into .x component
384 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
385 * mov t0.y, t1.x ; Move Cb sample into .y component
386 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
387 * mov t0.z, t1.x ; Move Cr sample into .z component
389 for (unsigned i
= 0; i
< 3; ++i
)
391 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
392 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
394 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
395 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
396 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
397 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
398 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
399 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
402 /* mul t0, t0, c0 ; Rescale texel to correct range */
403 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
404 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
406 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
407 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
408 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
410 /* add o0, t0, t1 ; Add ref and differential to form final output */
411 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
412 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
416 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
418 assert(ti
<= max_tokens
);
421 r
->p_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
426 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
432 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
434 const unsigned max_tokens
= 100;
436 struct pipe_shader_state vs
;
437 struct tgsi_token
*tokens
;
438 struct tgsi_header
*header
;
440 struct tgsi_full_declaration decl
;
441 struct tgsi_full_instruction inst
;
447 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
448 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
449 header
= (struct tgsi_header
*) &tokens
[1];
450 *header
= tgsi_build_header();
451 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
456 * decl i0 ; Vertex pos
457 * decl i1 ; Luma texcoords
458 * decl i2 ; Chroma Cb texcoords
459 * decl i3 ; Chroma Cr texcoords
460 * decl i4 ; First ref macroblock top field texcoords
461 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
462 * decl i6 ; Second ref macroblock top field texcoords
463 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
465 for (unsigned i
= 0; i
< 8; i
++)
467 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
468 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
472 * decl o0 ; Vertex pos
473 * decl o1 ; Luma texcoords
474 * decl o2 ; Chroma Cb texcoords
475 * decl o3 ; Chroma Cr texcoords
476 * decl o4 ; First ref macroblock texcoords
477 * decl o5 ; Second ref macroblock texcoords
479 for (unsigned i
= 0; i
< 6; i
++)
481 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
482 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
486 * mov o0, i0 ; Move input vertex pos to output
487 * mov o1, i1 ; Move input luma texcoords to output
488 * mov o2, i2 ; Move input chroma Cb texcoords to output
489 * mov o3, i3 ; Move input chroma Cr texcoords to output
491 for (unsigned i
= 0; i
< 4; ++i
)
493 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
494 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
498 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
499 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
501 for (unsigned i
= 0; i
< 2; ++i
)
503 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
504 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
509 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
511 assert(ti
<= max_tokens
);
514 r
->b_vs
[0] = r
->pipe
->create_vs_state(r
->pipe
, &vs
);
519 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer
*r
)
525 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
527 const unsigned max_tokens
= 100;
529 struct pipe_shader_state fs
;
530 struct tgsi_token
*tokens
;
531 struct tgsi_header
*header
;
533 struct tgsi_full_declaration decl
;
534 struct tgsi_full_instruction inst
;
540 tokens
= (struct tgsi_token
*) malloc(max_tokens
* sizeof(struct tgsi_token
));
541 *(struct tgsi_version
*) &tokens
[0] = tgsi_build_version();
542 header
= (struct tgsi_header
*) &tokens
[1];
543 *header
= tgsi_build_header();
544 *(struct tgsi_processor
*) &tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
549 * decl i0 ; Luma texcoords
550 * decl i1 ; Chroma Cb texcoords
551 * decl i2 ; Chroma Cr texcoords
552 * decl i3 ; First ref macroblock texcoords
553 * decl i4 ; Second ref macroblock texcoords
555 for (unsigned i
= 0; i
< 5; ++i
)
557 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
558 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
562 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
563 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
565 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
566 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
568 /* decl o0 ; Fragment color */
569 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
570 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
573 decl
= vl_decl_temps(0, 2);
574 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
577 * decl s0 ; Sampler for luma texture
578 * decl s1 ; Sampler for chroma Cb texture
579 * decl s2 ; Sampler for chroma Cr texture
580 * decl s3 ; Sampler for first ref surface texture
581 * decl s4 ; Sampler for second ref surface texture
583 for (unsigned i
= 0; i
< 5; ++i
)
585 decl
= vl_decl_samplers(i
, i
);
586 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
590 * tex2d t1, i0, s0 ; Read texel from luma texture
591 * mov t0.x, t1.x ; Move luma sample into .x component
592 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
593 * mov t0.y, t1.x ; Move Cb sample into .y component
594 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
595 * mov t0.z, t1.x ; Move Cr sample into .z component
597 for (unsigned i
= 0; i
< 3; ++i
)
599 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
600 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
602 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
603 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
604 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
605 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
606 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
607 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
610 /* mul t0, t0, c0 ; Rescale texel to correct range */
611 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
612 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
615 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
616 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
618 for (unsigned i
= 0; i
< 2; ++i
)
620 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
621 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
624 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
625 inst
= vl_inst4(TGSI_OPCODE_LRP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
626 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
627 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
628 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
629 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
630 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
632 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
633 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
634 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
638 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
640 assert(ti
<= max_tokens
);
643 r
->b_fs
[0] = r
->pipe
->create_fs_state(r
->pipe
, &fs
);
648 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer
*r
)
654 xfer_buffers_map(struct vl_mpeg12_mc_renderer
*r
)
658 for (unsigned i
= 0; i
< 3; ++i
)
660 r
->tex_transfer
[i
] = r
->pipe
->screen
->get_tex_transfer
662 r
->pipe
->screen
, r
->textures
.all
[i
],
663 0, 0, 0, PIPE_TRANSFER_WRITE
, 0, 0,
664 r
->textures
.all
[i
]->width
[0], r
->textures
.all
[i
]->height
[0]
667 r
->texels
[i
] = r
->pipe
->screen
->transfer_map(r
->pipe
->screen
, r
->tex_transfer
[i
]);
672 xfer_buffers_unmap(struct vl_mpeg12_mc_renderer
*r
)
676 for (unsigned i
= 0; i
< 3; ++i
)
678 r
->pipe
->screen
->transfer_unmap(r
->pipe
->screen
, r
->tex_transfer
[i
]);
679 r
->pipe
->screen
->tex_transfer_destroy(r
->tex_transfer
[i
]);
684 init_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
686 struct pipe_sampler_state sampler
;
691 r
->viewport
.scale
[0] = r
->pot_buffers
?
692 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
693 r
->viewport
.scale
[1] = r
->pot_buffers
?
694 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
695 r
->viewport
.scale
[2] = 1;
696 r
->viewport
.scale
[3] = 1;
697 r
->viewport
.translate
[0] = 0;
698 r
->viewport
.translate
[1] = 0;
699 r
->viewport
.translate
[2] = 0;
700 r
->viewport
.translate
[3] = 0;
702 r
->fb_state
.width
= r
->pot_buffers
?
703 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
704 r
->fb_state
.height
= r
->pot_buffers
?
705 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
706 r
->fb_state
.nr_cbufs
= 1;
707 r
->fb_state
.zsbuf
= NULL
;
710 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
712 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_444
||
713 r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
715 filters
[1] = PIPE_TEX_FILTER_NEAREST
;
716 filters
[2] = PIPE_TEX_FILTER_NEAREST
;
720 filters
[1] = PIPE_TEX_FILTER_LINEAR
;
721 filters
[2] = PIPE_TEX_FILTER_LINEAR
;
723 /* Fwd, bkwd ref filters */
724 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
725 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
727 for (unsigned i
= 0; i
< 5; ++i
)
729 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
730 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
731 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
732 sampler
.min_img_filter
= filters
[i
];
733 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
734 sampler
.mag_img_filter
= filters
[i
];
735 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
736 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
737 sampler
.normalized_coords
= 1;
738 /*sampler.prefilter = ; */
739 /*sampler.shadow_ambient = ; */
740 /*sampler.lod_bias = ; */
742 /*sampler.max_lod = ; */
743 /*sampler.border_color[i] = ; */
744 /*sampler.max_anisotropy = ; */
745 r
->samplers
.all
[i
] = r
->pipe
->create_sampler_state(r
->pipe
, &sampler
);
752 cleanup_pipe_state(struct vl_mpeg12_mc_renderer
*r
)
756 for (unsigned i
= 0; i
< 5; ++i
)
757 r
->pipe
->delete_sampler_state(r
->pipe
, r
->samplers
.all
[i
]);
761 init_shaders(struct vl_mpeg12_mc_renderer
*r
)
765 create_intra_vert_shader(r
);
766 create_intra_frag_shader(r
);
767 create_frame_pred_vert_shader(r
);
768 create_frame_pred_frag_shader(r
);
769 create_frame_bi_pred_vert_shader(r
);
770 create_frame_bi_pred_frag_shader(r
);
776 cleanup_shaders(struct vl_mpeg12_mc_renderer
*r
)
780 r
->pipe
->delete_vs_state(r
->pipe
, r
->i_vs
);
781 r
->pipe
->delete_fs_state(r
->pipe
, r
->i_fs
);
782 r
->pipe
->delete_vs_state(r
->pipe
, r
->p_vs
[0]);
783 r
->pipe
->delete_fs_state(r
->pipe
, r
->p_fs
[0]);
784 r
->pipe
->delete_vs_state(r
->pipe
, r
->b_vs
[0]);
785 r
->pipe
->delete_fs_state(r
->pipe
, r
->b_fs
[0]);
789 init_buffers(struct vl_mpeg12_mc_renderer
*r
)
791 struct pipe_texture
template;
794 align(r
->picture_width
, MACROBLOCK_WIDTH
) / MACROBLOCK_WIDTH
;
796 align(r
->picture_height
, MACROBLOCK_HEIGHT
) / MACROBLOCK_HEIGHT
;
800 r
->macroblocks_per_batch
=
801 mbw
* (r
->bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
? mbh
: 1);
802 r
->num_macroblocks
= 0;
803 r
->macroblock_buf
= MALLOC(r
->macroblocks_per_batch
* sizeof(struct pipe_mpeg12_macroblock
));
805 memset(&template, 0, sizeof(struct pipe_texture
));
806 template.target
= PIPE_TEXTURE_2D
;
807 /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
808 template.format
= PIPE_FORMAT_R16_SNORM
;
809 template.last_level
= 0;
810 template.width
[0] = r
->pot_buffers
?
811 util_next_power_of_two(r
->picture_width
) : r
->picture_width
;
812 template.height
[0] = r
->pot_buffers
?
813 util_next_power_of_two(r
->picture_height
) : r
->picture_height
;
814 template.depth
[0] = 1;
815 pf_get_block(template.format
, &template.block
);
816 template.tex_usage
= PIPE_TEXTURE_USAGE_SAMPLER
| PIPE_TEXTURE_USAGE_DYNAMIC
;
818 r
->textures
.individual
.y
= r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
820 if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
)
822 template.width
[0] = r
->pot_buffers
?
823 util_next_power_of_two(r
->picture_width
/ 2) :
824 r
->picture_width
/ 2;
825 template.height
[0] = r
->pot_buffers
?
826 util_next_power_of_two(r
->picture_height
/ 2) :
827 r
->picture_height
/ 2;
829 else if (r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_422
)
830 template.height
[0] = r
->pot_buffers
?
831 util_next_power_of_two(r
->picture_height
/ 2) :
832 r
->picture_height
/ 2;
834 r
->textures
.individual
.cb
=
835 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
836 r
->textures
.individual
.cr
=
837 r
->pipe
->screen
->texture_create(r
->pipe
->screen
, &template);
839 r
->vertex_bufs
.individual
.ycbcr
.stride
= sizeof(struct vertex2f
) * 4;
840 r
->vertex_bufs
.individual
.ycbcr
.max_index
= 24 * r
->macroblocks_per_batch
- 1;
841 r
->vertex_bufs
.individual
.ycbcr
.buffer_offset
= 0;
842 r
->vertex_bufs
.individual
.ycbcr
.buffer
= pipe_buffer_create
845 DEFAULT_BUF_ALIGNMENT
,
846 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
847 sizeof(struct vertex2f
) * 4 * 24 * r
->macroblocks_per_batch
850 for (unsigned i
= 1; i
< 3; ++i
)
852 r
->vertex_bufs
.all
[i
].stride
= sizeof(struct vertex2f
) * 2;
853 r
->vertex_bufs
.all
[i
].max_index
= 24 * r
->macroblocks_per_batch
- 1;
854 r
->vertex_bufs
.all
[i
].buffer_offset
= 0;
855 r
->vertex_bufs
.all
[i
].buffer
= pipe_buffer_create
858 DEFAULT_BUF_ALIGNMENT
,
859 PIPE_BUFFER_USAGE_VERTEX
| PIPE_BUFFER_USAGE_DISCARD
,
860 sizeof(struct vertex2f
) * 2 * 24 * r
->macroblocks_per_batch
864 /* Position element */
865 r
->vertex_elems
[0].src_offset
= 0;
866 r
->vertex_elems
[0].vertex_buffer_index
= 0;
867 r
->vertex_elems
[0].nr_components
= 2;
868 r
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
870 /* Luma, texcoord element */
871 r
->vertex_elems
[1].src_offset
= sizeof(struct vertex2f
);
872 r
->vertex_elems
[1].vertex_buffer_index
= 0;
873 r
->vertex_elems
[1].nr_components
= 2;
874 r
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
876 /* Chroma Cr texcoord element */
877 r
->vertex_elems
[2].src_offset
= sizeof(struct vertex2f
) * 2;
878 r
->vertex_elems
[2].vertex_buffer_index
= 0;
879 r
->vertex_elems
[2].nr_components
= 2;
880 r
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
882 /* Chroma Cb texcoord element */
883 r
->vertex_elems
[3].src_offset
= sizeof(struct vertex2f
) * 3;
884 r
->vertex_elems
[3].vertex_buffer_index
= 0;
885 r
->vertex_elems
[3].nr_components
= 2;
886 r
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
888 /* First ref surface top field texcoord element */
889 r
->vertex_elems
[4].src_offset
= 0;
890 r
->vertex_elems
[4].vertex_buffer_index
= 1;
891 r
->vertex_elems
[4].nr_components
= 2;
892 r
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
894 /* First ref surface bottom field texcoord element */
895 r
->vertex_elems
[5].src_offset
= sizeof(struct vertex2f
);
896 r
->vertex_elems
[5].vertex_buffer_index
= 1;
897 r
->vertex_elems
[5].nr_components
= 2;
898 r
->vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
900 /* Second ref surface top field texcoord element */
901 r
->vertex_elems
[6].src_offset
= 0;
902 r
->vertex_elems
[6].vertex_buffer_index
= 2;
903 r
->vertex_elems
[6].nr_components
= 2;
904 r
->vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
906 /* Second ref surface bottom field texcoord element */
907 r
->vertex_elems
[7].src_offset
= sizeof(struct vertex2f
);
908 r
->vertex_elems
[7].vertex_buffer_index
= 2;
909 r
->vertex_elems
[7].nr_components
= 2;
910 r
->vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
912 r
->vs_const_buf
.buffer
= pipe_buffer_create
915 DEFAULT_BUF_ALIGNMENT
,
916 PIPE_BUFFER_USAGE_CONSTANT
| PIPE_BUFFER_USAGE_DISCARD
,
917 sizeof(struct vertex_shader_consts
)
920 r
->fs_const_buf
.buffer
= pipe_buffer_create
923 DEFAULT_BUF_ALIGNMENT
,
924 PIPE_BUFFER_USAGE_CONSTANT
, sizeof(struct fragment_shader_consts
)
929 pipe_buffer_map(r
->pipe
->screen
, r
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
930 &fs_consts
, sizeof(struct fragment_shader_consts
)
933 pipe_buffer_unmap(r
->pipe
->screen
, r
->fs_const_buf
.buffer
);
939 cleanup_buffers(struct vl_mpeg12_mc_renderer
*r
)
943 pipe_buffer_reference(&r
->vs_const_buf
.buffer
, NULL
);
944 pipe_buffer_reference(&r
->fs_const_buf
.buffer
, NULL
);
946 for (unsigned i
= 0; i
< 3; ++i
)
947 pipe_buffer_reference(&r
->vertex_bufs
.all
[i
].buffer
, NULL
);
949 for (unsigned i
= 0; i
< 3; ++i
)
950 pipe_texture_reference(&r
->textures
.all
[i
], NULL
);
952 FREE(r
->macroblock_buf
);
955 static enum MACROBLOCK_TYPE
956 get_macroblock_type(struct pipe_mpeg12_macroblock
*mb
)
962 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
963 return MACROBLOCK_TYPE_INTRA
;
964 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
965 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
966 MACROBLOCK_TYPE_FWD_FRAME_PRED
: MACROBLOCK_TYPE_FWD_FIELD_PRED
;
967 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
968 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
969 MACROBLOCK_TYPE_BKWD_FRAME_PRED
: MACROBLOCK_TYPE_BKWD_FIELD_PRED
;
970 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
971 return mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
?
972 MACROBLOCK_TYPE_BI_FRAME_PRED
: MACROBLOCK_TYPE_BI_FIELD_PRED
;
981 /* XXX: One of these days this will have to be killed with fire */
982 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb) \
984 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
985 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
986 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
987 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
988 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
989 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
991 if (!use_zb || (cbp) & (lm)) \
993 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
994 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
995 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
996 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
997 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
998 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1002 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
1003 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
1004 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
1005 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
1006 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
1007 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
1010 if (!use_zb || (cbp) & (cbm)) \
1012 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
1013 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1014 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
1015 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
1016 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1017 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1021 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
1022 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
1023 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
1024 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
1025 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
1026 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
1029 if (!use_zb || (cbp) & (crm)) \
1031 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
1032 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1033 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
1034 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
1035 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1036 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
1040 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
1041 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
1042 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
1043 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
1044 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
1045 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
1050 gen_macroblock_verts(struct vl_mpeg12_mc_renderer
*r
,
1051 struct pipe_mpeg12_macroblock
*mb
, unsigned pos
,
1052 struct vert_stream_0
*ycbcr_vb
, struct vertex2f
**ref_vb
)
1054 struct vertex2f mo_vec
[2];
1059 assert(pos
< r
->macroblocks_per_batch
);
1061 switch (mb
->mb_type
)
1063 case PIPE_MPEG12_MACROBLOCK_TYPE_BI
:
1065 struct vertex2f
*vb
;
1067 assert(ref_vb
&& ref_vb
[1]);
1069 vb
= ref_vb
[1] + pos
* 2 * 24;
1071 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1072 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1074 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
)
1076 for (unsigned i
= 0; i
< 24 * 2; i
+= 2)
1078 vb
[i
].x
= mo_vec
[0].x
;
1079 vb
[i
].y
= mo_vec
[0].y
;
1084 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1085 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1087 for (unsigned i
= 0; i
< 24 * 2; i
+= 2)
1089 vb
[i
].x
= mo_vec
[0].x
;
1090 vb
[i
].y
= mo_vec
[0].y
;
1091 vb
[i
+ 1].x
= mo_vec
[1].x
;
1092 vb
[i
+ 1].y
= mo_vec
[1].y
;
1098 case PIPE_MPEG12_MACROBLOCK_TYPE_FWD
:
1099 case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
:
1101 struct vertex2f
*vb
;
1103 assert(ref_vb
&& ref_vb
[0]);
1105 vb
= ref_vb
[0] + pos
* 2 * 24;
1107 if (mb
->mb_type
== PIPE_MPEG12_MACROBLOCK_TYPE_BKWD
)
1109 mo_vec
[0].x
= mb
->pmv
[0][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1110 mo_vec
[0].y
= mb
->pmv
[0][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1112 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
)
1114 mo_vec
[1].x
= mb
->pmv
[1][1][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1115 mo_vec
[1].y
= mb
->pmv
[1][1][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1120 mo_vec
[0].x
= mb
->pmv
[0][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1121 mo_vec
[0].y
= mb
->pmv
[0][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1123 if (mb
->mo_type
== PIPE_MPEG12_MOTION_TYPE_FIELD
)
1125 mo_vec
[1].x
= mb
->pmv
[1][0][0] * 0.5f
* r
->surface_tex_inv_size
.x
;
1126 mo_vec
[1].y
= mb
->pmv
[1][0][1] * 0.5f
* r
->surface_tex_inv_size
.y
;
1130 if (mb
->mb_type
== PIPE_MPEG12_MOTION_TYPE_FRAME
)
1132 for (unsigned i
= 0; i
< 24 * 2; i
+= 2)
1134 vb
[i
].x
= mo_vec
[0].x
;
1135 vb
[i
].y
= mo_vec
[0].y
;
1140 for (unsigned i
= 0; i
< 24 * 2; i
+= 2)
1142 vb
[i
].x
= mo_vec
[0].x
;
1143 vb
[i
].y
= mo_vec
[0].y
;
1144 vb
[i
+ 1].x
= mo_vec
[1].x
;
1145 vb
[i
+ 1].y
= mo_vec
[1].y
;
1151 case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA
:
1153 const struct vertex2f unit
=
1155 r
->surface_tex_inv_size
.x
* MACROBLOCK_WIDTH
,
1156 r
->surface_tex_inv_size
.y
* MACROBLOCK_HEIGHT
1158 const struct vertex2f half
=
1160 r
->surface_tex_inv_size
.x
* (MACROBLOCK_WIDTH
/ 2),
1161 r
->surface_tex_inv_size
.y
* (MACROBLOCK_HEIGHT
/ 2)
1163 const bool use_zb
= r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
;
1165 struct vert_stream_0
*vb
= ycbcr_vb
+ pos
* 24;
1167 SET_BLOCK(vb
, mb
->cbp
, mb
->mbx
, mb
->mby
,
1168 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
1169 32, 2, 1, use_zb
, r
->zero_block
);
1171 SET_BLOCK(vb
+ 6, mb
->cbp
, mb
->mbx
, mb
->mby
,
1172 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
1173 16, 2, 1, use_zb
, r
->zero_block
);
1175 SET_BLOCK(vb
+ 12, mb
->cbp
, mb
->mbx
, mb
->mby
,
1176 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
1177 8, 2, 1, use_zb
, r
->zero_block
);
1179 SET_BLOCK(vb
+ 18, mb
->cbp
, mb
->mbx
, mb
->mby
,
1180 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
1181 4, 2, 1, use_zb
, r
->zero_block
);
1191 gen_macroblock_stream(struct vl_mpeg12_mc_renderer
*r
,
1192 unsigned *num_macroblocks
)
1194 unsigned offset
[NUM_MACROBLOCK_TYPES
];
1195 struct vert_stream_0
*ycbcr_vb
;
1196 struct vertex2f
*ref_vb
[2];
1199 assert(num_macroblocks
);
1201 for (unsigned i
= 0; i
< r
->num_macroblocks
; ++i
)
1203 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1204 ++num_macroblocks
[mb_type
];
1209 for (unsigned i
= 1; i
< NUM_MACROBLOCK_TYPES
; ++i
)
1210 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
1212 ycbcr_vb
= (struct vert_stream_0
*)pipe_buffer_map
1215 r
->vertex_bufs
.individual
.ycbcr
.buffer
,
1216 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1219 for (unsigned i
= 0; i
< 2; ++i
)
1220 ref_vb
[i
] = (struct vertex2f
*)pipe_buffer_map
1223 r
->vertex_bufs
.individual
.ref
[i
].buffer
,
1224 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1227 for (unsigned i
= 0; i
< r
->num_macroblocks
; ++i
)
1229 enum MACROBLOCK_TYPE mb_type
= get_macroblock_type(&r
->macroblock_buf
[i
]);
1231 gen_macroblock_verts(r
, &r
->macroblock_buf
[i
], offset
[mb_type
],
1237 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ycbcr
.buffer
);
1238 for (unsigned i
= 0; i
< 2; ++i
)
1239 pipe_buffer_unmap(r
->pipe
->screen
, r
->vertex_bufs
.individual
.ref
[i
].buffer
);
1243 flush(struct vl_mpeg12_mc_renderer
*r
)
1245 unsigned num_macroblocks
[NUM_MACROBLOCK_TYPES
] = { 0 };
1246 unsigned vb_start
= 0;
1247 struct vertex_shader_consts
*vs_consts
;
1250 assert(r
->num_macroblocks
== r
->macroblocks_per_batch
);
1252 gen_macroblock_stream(r
, num_macroblocks
);
1254 r
->fb_state
.cbufs
[0] = r
->pipe
->screen
->get_tex_surface
1256 r
->pipe
->screen
, r
->surface
,
1257 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
1260 r
->pipe
->set_framebuffer_state(r
->pipe
, &r
->fb_state
);
1261 r
->pipe
->set_viewport_state(r
->pipe
, &r
->viewport
);
1263 vs_consts
= pipe_buffer_map
1265 r
->pipe
->screen
, r
->vs_const_buf
.buffer
,
1266 PIPE_BUFFER_USAGE_CPU_WRITE
| PIPE_BUFFER_USAGE_DISCARD
1269 vs_consts
->denorm
.x
= r
->surface
->width
[0];
1270 vs_consts
->denorm
.y
= r
->surface
->height
[0];
1272 pipe_buffer_unmap(r
->pipe
->screen
, r
->vs_const_buf
.buffer
);
1274 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_VERTEX
, 0,
1276 r
->pipe
->set_constant_buffer(r
->pipe
, PIPE_SHADER_FRAGMENT
, 0,
1279 if (num_macroblocks
[MACROBLOCK_TYPE_INTRA
] > 0)
1281 r
->pipe
->set_vertex_buffers(r
->pipe
, 1, r
->vertex_bufs
.all
);
1282 r
->pipe
->set_vertex_elements(r
->pipe
, 4, r
->vertex_elems
);
1283 r
->pipe
->set_sampler_textures(r
->pipe
, 3, r
->textures
.all
);
1284 r
->pipe
->bind_sampler_states(r
->pipe
, 3, r
->samplers
.all
);
1285 r
->pipe
->bind_vs_state(r
->pipe
, r
->i_vs
);
1286 r
->pipe
->bind_fs_state(r
->pipe
, r
->i_fs
);
1288 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1289 num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24);
1290 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_INTRA
] * 24;
1293 if (num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] > 0)
1295 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1296 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1297 r
->textures
.individual
.ref
[0] = r
->past
;
1298 r
->pipe
->set_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1299 r
->pipe
->bind_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1300 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1301 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1303 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1304 num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24);
1305 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FRAME_PRED
] * 24;
1308 if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ )
1310 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1311 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1312 r
->textures
.individual
.ref
[0] = r
->past
;
1313 r
->pipe
->set_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1314 r
->pipe
->bind_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1315 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1316 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1318 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1319 num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24);
1320 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_FWD_FIELD_PRED
] * 24;
1323 if (num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] > 0)
1325 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1326 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1327 r
->textures
.individual
.ref
[0] = r
->future
;
1328 r
->pipe
->set_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1329 r
->pipe
->bind_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1330 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[0]);
1331 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[0]);
1333 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1334 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24);
1335 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FRAME_PRED
] * 24;
1338 if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ )
1340 r
->pipe
->set_vertex_buffers(r
->pipe
, 2, r
->vertex_bufs
.all
);
1341 r
->pipe
->set_vertex_elements(r
->pipe
, 6, r
->vertex_elems
);
1342 r
->textures
.individual
.ref
[0] = r
->future
;
1343 r
->pipe
->set_sampler_textures(r
->pipe
, 4, r
->textures
.all
);
1344 r
->pipe
->bind_sampler_states(r
->pipe
, 4, r
->samplers
.all
);
1345 r
->pipe
->bind_vs_state(r
->pipe
, r
->p_vs
[1]);
1346 r
->pipe
->bind_fs_state(r
->pipe
, r
->p_fs
[1]);
1348 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1349 num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24);
1350 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BKWD_FIELD_PRED
] * 24;
1353 if (num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] > 0)
1355 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1356 r
->pipe
->set_vertex_elements(r
->pipe
, 8, r
->vertex_elems
);
1357 r
->textures
.individual
.ref
[0] = r
->past
;
1358 r
->textures
.individual
.ref
[1] = r
->future
;
1359 r
->pipe
->set_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1360 r
->pipe
->bind_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1361 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[0]);
1362 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[0]);
1364 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1365 num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24);
1366 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FRAME_PRED
] * 24;
1369 if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ )
1371 r
->pipe
->set_vertex_buffers(r
->pipe
, 3, r
->vertex_bufs
.all
);
1372 r
->pipe
->set_vertex_elements(r
->pipe
, 8, r
->vertex_elems
);
1373 r
->textures
.individual
.ref
[0] = r
->past
;
1374 r
->textures
.individual
.ref
[1] = r
->future
;
1375 r
->pipe
->set_sampler_textures(r
->pipe
, 5, r
->textures
.all
);
1376 r
->pipe
->bind_sampler_states(r
->pipe
, 5, r
->samplers
.all
);
1377 r
->pipe
->bind_vs_state(r
->pipe
, r
->b_vs
[1]);
1378 r
->pipe
->bind_fs_state(r
->pipe
, r
->b_fs
[1]);
1380 r
->pipe
->draw_arrays(r
->pipe
, PIPE_PRIM_TRIANGLES
, vb_start
,
1381 num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24);
1382 vb_start
+= num_macroblocks
[MACROBLOCK_TYPE_BI_FIELD_PRED
] * 24;
1385 r
->pipe
->flush(r
->pipe
, PIPE_FLUSH_RENDER_CACHE
, r
->fence
);
1386 pipe_surface_reference(&r
->fb_state
.cbufs
[0], NULL
);
1388 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
1389 for (unsigned i
= 0; i
< 3; ++i
)
1390 r
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1392 r
->num_macroblocks
= 0;
1396 grab_frame_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1401 for (unsigned y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1402 memcpy(dst
+ y
* dst_pitch
, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1406 grab_field_coded_block(short *src
, short *dst
, unsigned dst_pitch
)
1411 for (unsigned y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1412 memcpy(dst
+ y
* dst_pitch
* 2, src
+ y
* BLOCK_WIDTH
, BLOCK_WIDTH
* 2);
1416 fill_zero_block(short *dst
, unsigned dst_pitch
)
1420 for (unsigned y
= 0; y
< BLOCK_HEIGHT
; ++y
)
1421 memset(dst
+ y
* dst_pitch
, 0, BLOCK_WIDTH
* 2);
1425 grab_blocks(struct vl_mpeg12_mc_renderer
*r
, unsigned mbx
, unsigned mby
,
1426 enum pipe_mpeg12_dct_type dct_type
, unsigned cbp
, short *blocks
)
1430 unsigned tb
= 0, sb
= 0;
1431 unsigned mbpx
= mbx
* MACROBLOCK_WIDTH
, mbpy
= mby
* MACROBLOCK_HEIGHT
;
1436 tex_pitch
= r
->tex_transfer
[0]->stride
/ r
->tex_transfer
[0]->block
.size
;
1437 texels
= r
->texels
[0] + mbpy
* tex_pitch
+ mbpx
;
1439 for (unsigned y
= 0; y
< 2; ++y
)
1441 for (unsigned x
= 0; x
< 2; ++x
, ++tb
)
1443 if ((cbp
>> (5 - tb
)) & 1)
1445 if (dct_type
== PIPE_MPEG12_DCT_TYPE_FRAME
)
1447 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1448 texels
+ y
* tex_pitch
* BLOCK_WIDTH
+
1449 x
* BLOCK_WIDTH
, tex_pitch
);
1453 grab_field_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
,
1454 texels
+ y
* tex_pitch
+ x
* BLOCK_WIDTH
,
1460 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
)
1462 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1463 ZERO_BLOCK_IS_NIL(r
->zero_block
[0]))
1465 fill_zero_block(texels
+ y
* tex_pitch
* BLOCK_WIDTH
+ x
* BLOCK_WIDTH
, tex_pitch
);
1466 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
1468 r
->zero_block
[0].x
= (mbpx
+ x
* 8) * r
->surface_tex_inv_size
.x
;
1469 r
->zero_block
[0].y
= (mbpy
+ y
* 8) * r
->surface_tex_inv_size
.y
;
1476 /* TODO: Implement 422, 444 */
1477 assert(r
->chroma_format
== PIPE_VIDEO_CHROMA_FORMAT_420
);
1482 for (tb
= 0; tb
< 2; ++tb
)
1484 tex_pitch
= r
->tex_transfer
[tb
+ 1]->stride
/ r
->tex_transfer
[tb
+ 1]->block
.size
;
1485 texels
= r
->texels
[tb
+ 1] + mbpy
* tex_pitch
+ mbpx
;
1487 if ((cbp
>> (1 - tb
)) & 1)
1489 grab_frame_coded_block(blocks
+ sb
* BLOCK_WIDTH
* BLOCK_HEIGHT
, texels
, tex_pitch
);
1492 else if (r
->eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
)
1494 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL
||
1495 ZERO_BLOCK_IS_NIL(r
->zero_block
[tb
+ 1]))
1497 fill_zero_block(texels
, tex_pitch
);
1498 if (r
->eb_handling
== VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE
)
1500 r
->zero_block
[tb
+ 1].x
= (mbpx
<< 1) * r
->surface_tex_inv_size
.x
;
1501 r
->zero_block
[tb
+ 1].y
= (mbpy
<< 1) * r
->surface_tex_inv_size
.y
;
1509 grab_macroblock(struct vl_mpeg12_mc_renderer
*r
,
1510 struct pipe_mpeg12_macroblock
*mb
)
1514 assert(r
->num_macroblocks
< r
->macroblocks_per_batch
);
1516 memcpy(&r
->macroblock_buf
[r
->num_macroblocks
], mb
,
1517 sizeof(struct pipe_mpeg12_macroblock
));
1519 grab_blocks(r
, mb
->mbx
, mb
->mby
, mb
->dct_type
, mb
->cbp
, mb
->blocks
);
1521 ++r
->num_macroblocks
;
1525 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer
*renderer
,
1526 struct pipe_context
*pipe
,
1527 unsigned picture_width
,
1528 unsigned picture_height
,
1529 enum pipe_video_chroma_format chroma_format
,
1530 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode
,
1531 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling
,
1536 /* TODO: Implement other policies */
1537 assert(bufmode
== VL_MPEG12_MC_RENDERER_BUFFER_PICTURE
);
1538 /* TODO: Implement this */
1539 /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
1540 assert(eb_handling
!= VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE
);
1541 /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
1542 assert(pot_buffers
);
1544 memset(renderer
, 0, sizeof(struct vl_mpeg12_mc_renderer
));
1546 renderer
->pipe
= pipe
;
1547 renderer
->picture_width
= picture_width
;
1548 renderer
->picture_height
= picture_height
;
1549 renderer
->chroma_format
= chroma_format
;
1550 renderer
->bufmode
= bufmode
;
1551 renderer
->eb_handling
= eb_handling
;
1552 renderer
->pot_buffers
= pot_buffers
;
1554 if (!init_pipe_state(renderer
))
1556 if (!init_shaders(renderer
))
1558 cleanup_pipe_state(renderer
);
1561 if (!init_buffers(renderer
))
1563 cleanup_shaders(renderer
);
1564 cleanup_pipe_state(renderer
);
1568 renderer
->surface
= NULL
;
1569 renderer
->past
= NULL
;
1570 renderer
->future
= NULL
;
1571 for (unsigned i
= 0; i
< 3; ++i
)
1572 renderer
->zero_block
[i
].x
= ZERO_BLOCK_NIL
;
1573 renderer
->num_macroblocks
= 0;
1575 xfer_buffers_map(renderer
);
1581 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer
*renderer
)
1585 xfer_buffers_unmap(renderer
);
1587 cleanup_pipe_state(renderer
);
1588 cleanup_shaders(renderer
);
1589 cleanup_buffers(renderer
);
1593 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
1595 struct pipe_texture
*surface
,
1596 struct pipe_texture
*past
,
1597 struct pipe_texture
*future
,
1598 unsigned num_macroblocks
,
1599 struct pipe_mpeg12_macroblock
1600 *mpeg12_macroblocks
,
1601 struct pipe_fence_handle
**fence
)
1603 bool new_surface
= false;
1607 assert(num_macroblocks
);
1608 assert(mpeg12_macroblocks
);
1610 if (renderer
->surface
)
1612 if (surface
!= renderer
->surface
)
1614 if (renderer
->num_macroblocks
> 0)
1616 xfer_buffers_unmap(renderer
);
1623 /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
1624 assert(surface
!= renderer
->surface
|| renderer
->past
== past
);
1625 assert(surface
!= renderer
->surface
|| renderer
->future
== future
);
1632 renderer
->surface
= surface
;
1633 renderer
->past
= past
;
1634 renderer
->future
= future
;
1635 renderer
->fence
= fence
;
1636 renderer
->surface_tex_inv_size
.x
= 1.0f
/ surface
->width
[0];
1637 renderer
->surface_tex_inv_size
.y
= 1.0f
/ surface
->height
[0];
1640 while (num_macroblocks
)
1642 unsigned left_in_batch
= renderer
->macroblocks_per_batch
- renderer
->num_macroblocks
;
1643 unsigned num_to_submit
= MIN2(num_macroblocks
, left_in_batch
);
1645 for (unsigned i
= 0; i
< num_to_submit
; ++i
)
1647 assert(mpeg12_macroblocks
[i
].base
.codec
== PIPE_VIDEO_CODEC_MPEG12
);
1648 grab_macroblock(renderer
, &mpeg12_macroblocks
[i
]);
1651 num_macroblocks
-= num_to_submit
;
1653 if (renderer
->num_macroblocks
== renderer
->macroblocks_per_batch
)
1655 xfer_buffers_unmap(renderer
);
1657 xfer_buffers_map(renderer
);
1658 /* Next time we get this surface it may have new ref frames */
1659 renderer
->surface
= NULL
;