2 #include "vl_r16snorm_mc_buf.h"
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
31 enum vlMacroBlockTypeEx
33 vlMacroBlockExTypeIntra
,
34 vlMacroBlockExTypeFwdPredictedFrame
,
35 vlMacroBlockExTypeFwdPredictedField
,
36 vlMacroBlockExTypeBkwdPredictedFrame
,
37 vlMacroBlockExTypeBkwdPredictedField
,
38 vlMacroBlockExTypeBiPredictedFrame
,
39 vlMacroBlockExTypeBiPredictedField
,
41 vlNumMacroBlockExTypes
44 struct vlVertexShaderConsts
46 struct vlVertex4f denorm
;
49 struct vlFragmentShaderConsts
51 struct vlVertex4f multiplier
;
52 struct vlVertex4f div
;
55 struct vlR16SnormBufferedMC
59 unsigned int picture_width
, picture_height
;
60 enum vlFormat picture_format
;
63 struct vlSurface
*buffered_surface
;
64 struct vlSurface
*past_surface
, *future_surface
;
65 struct vlVertex2f surface_tex_inv_size
;
66 struct vlVertex2f zero_block
[3];
67 unsigned int num_macroblocks
;
68 struct vlMpeg2MacroBlock
*macroblocks
;
70 struct pipe_context
*pipe
;
71 struct pipe_viewport_state viewport
;
72 struct pipe_framebuffer_state render_target
;
73 struct pipe_sampler_state
*samplers
[5];
74 struct pipe_texture
*textures
[NUM_BUF_SETS
][5];
75 void *i_vs
, *p_vs
[2], *b_vs
[2];
76 void *i_fs
, *p_fs
[2], *b_fs
[2];
77 struct pipe_vertex_buffer vertex_bufs
[NUM_BUF_SETS
][3];
78 struct pipe_vertex_element vertex_elems
[8];
79 struct pipe_constant_buffer vs_const_buf
, fs_const_buf
;
84 struct vlRender
*render
92 static inline int vlGrabFrameCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
96 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
100 src
+ y
* VL_BLOCK_WIDTH
,
107 static inline int vlGrabFieldCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
111 for (y
= 0; y
< VL_BLOCK_HEIGHT
/ 2; ++y
)
114 dst
+ y
* dst_pitch
* 2,
115 src
+ y
* VL_BLOCK_WIDTH
,
119 dst
+= VL_BLOCK_HEIGHT
* dst_pitch
;
121 for (; y
< VL_BLOCK_HEIGHT
; ++y
)
124 dst
+ y
* dst_pitch
* 2,
125 src
+ y
* VL_BLOCK_WIDTH
,
132 static inline int vlGrabNoBlock(short *dst
, unsigned int dst_pitch
)
136 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
147 static inline int vlGrabBlocks
149 struct vlR16SnormBufferedMC
*mc
,
152 enum vlDCTType dct_type
,
153 unsigned int coded_block_pattern
,
157 struct pipe_surface
*tex_surface
;
159 unsigned int tex_pitch
;
160 unsigned int x
, y
, tb
= 0, sb
= 0;
161 unsigned int mbpx
= mbx
* VL_MACROBLOCK_WIDTH
, mbpy
= mby
* VL_MACROBLOCK_HEIGHT
;
166 tex_surface
= mc
->pipe
->screen
->get_tex_surface
169 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][0],
170 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
173 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
174 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
176 texels
+= mbpy
* tex_pitch
+ mbpx
;
178 for (y
= 0; y
< 2; ++y
)
180 for (x
= 0; x
< 2; ++x
, ++tb
)
182 if ((coded_block_pattern
>> (5 - tb
)) & 1)
184 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
186 if (dct_type
== vlDCTTypeFrameCoded
)
188 vlGrabFrameCodedBlock
191 texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
,
197 vlGrabFieldCodedBlock
200 texels
+ y
* tex_pitch
+ x
* VL_BLOCK_WIDTH
,
207 else if (mc
->zero_block
[0].x
< 0.0f
)
209 vlGrabNoBlock(texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
, tex_pitch
);
211 mc
->zero_block
[0].x
= (mbpx
+ x
* 8) * mc
->surface_tex_inv_size
.x
;
212 mc
->zero_block
[0].y
= (mbpy
+ y
* 8) * mc
->surface_tex_inv_size
.y
;
217 pipe_surface_unmap(tex_surface
);
219 /* TODO: Implement 422, 444 */
223 for (tb
= 0; tb
< 2; ++tb
)
225 tex_surface
= mc
->pipe
->screen
->get_tex_surface
228 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][tb
+ 1],
229 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
232 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
233 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
235 texels
+= mbpy
* tex_pitch
+ mbpx
;
237 if ((coded_block_pattern
>> (1 - tb
)) & 1)
239 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
241 vlGrabFrameCodedBlock
250 else if (mc
->zero_block
[tb
+ 1].x
< 0.0f
)
252 vlGrabNoBlock(texels
, tex_pitch
);
254 mc
->zero_block
[tb
+ 1].x
= (mbpx
<< 1) * mc
->surface_tex_inv_size
.x
;
255 mc
->zero_block
[tb
+ 1].y
= (mbpy
<< 1) * mc
->surface_tex_inv_size
.y
;
258 pipe_surface_unmap(tex_surface
);
264 static inline enum vlMacroBlockTypeEx
vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock
*mb
)
270 case vlMacroBlockTypeIntra
:
271 return vlMacroBlockExTypeIntra
;
272 case vlMacroBlockTypeFwdPredicted
:
273 return mb
->mo_type
== vlMotionTypeFrame
?
274 vlMacroBlockExTypeFwdPredictedFrame
: vlMacroBlockExTypeFwdPredictedField
;
275 case vlMacroBlockTypeBkwdPredicted
:
276 return mb
->mo_type
== vlMotionTypeFrame
?
277 vlMacroBlockExTypeBkwdPredictedFrame
: vlMacroBlockExTypeBkwdPredictedField
;
278 case vlMacroBlockTypeBiPredicted
:
279 return mb
->mo_type
== vlMotionTypeFrame
?
280 vlMacroBlockExTypeBiPredictedFrame
: vlMacroBlockExTypeBiPredictedField
;
289 static inline int vlGrabMacroBlock
291 struct vlR16SnormBufferedMC
*mc
,
292 struct vlMpeg2MacroBlock
*macroblock
298 mc
->macroblocks
[mc
->num_macroblocks
].mbx
= macroblock
->mbx
;
299 mc
->macroblocks
[mc
->num_macroblocks
].mby
= macroblock
->mby
;
300 mc
->macroblocks
[mc
->num_macroblocks
].mb_type
= macroblock
->mb_type
;
301 mc
->macroblocks
[mc
->num_macroblocks
].mo_type
= macroblock
->mo_type
;
302 mc
->macroblocks
[mc
->num_macroblocks
].dct_type
= macroblock
->dct_type
;
303 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][0] = macroblock
->PMV
[0][0][0];
304 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][1] = macroblock
->PMV
[0][0][1];
305 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][0] = macroblock
->PMV
[0][1][0];
306 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][1] = macroblock
->PMV
[0][1][1];
307 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][0] = macroblock
->PMV
[1][0][0];
308 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][1] = macroblock
->PMV
[1][0][1];
309 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][0] = macroblock
->PMV
[1][1][0];
310 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][1] = macroblock
->PMV
[1][1][1];
311 mc
->macroblocks
[mc
->num_macroblocks
].cbp
= macroblock
->cbp
;
312 mc
->macroblocks
[mc
->num_macroblocks
].blocks
= macroblock
->blocks
;
319 macroblock
->dct_type
,
324 mc
->num_macroblocks
++;
329 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
330 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
331 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
332 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
333 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
334 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
335 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
339 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
340 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
341 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
342 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
343 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
344 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
348 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
349 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
350 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
351 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
352 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
353 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
358 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
359 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
360 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
361 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
362 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
363 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
367 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
368 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
369 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
370 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
371 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
372 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
377 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
378 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
379 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
380 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
381 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
382 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
386 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
387 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
388 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
389 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
390 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
391 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
394 static inline int vlGrabMacroBlockVB
396 struct vlR16SnormBufferedMC
*mc
,
397 struct vlMpeg2MacroBlock
*macroblock
,
401 struct vlVertex2f mo_vec
[2];
407 switch (macroblock
->mb_type
)
409 case vlMacroBlockTypeBiPredicted
:
411 struct vlVertex2f
*vb
;
413 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
416 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
,
417 PIPE_BUFFER_USAGE_CPU_WRITE
420 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
421 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
423 if (macroblock
->mo_type
== vlMotionTypeFrame
)
425 for (i
= 0; i
< 24 * 2; i
+= 2)
427 vb
[i
].x
= mo_vec
[0].x
;
428 vb
[i
].y
= mo_vec
[0].y
;
433 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
434 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
436 for (i
= 0; i
< 24 * 2; i
+= 2)
438 vb
[i
].x
= mo_vec
[0].x
;
439 vb
[i
].y
= mo_vec
[0].y
;
440 vb
[i
+ 1].x
= mo_vec
[1].x
;
441 vb
[i
+ 1].y
= mo_vec
[1].y
;
445 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
);
449 case vlMacroBlockTypeFwdPredicted
:
450 case vlMacroBlockTypeBkwdPredicted
:
452 struct vlVertex2f
*vb
;
454 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
457 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
,
458 PIPE_BUFFER_USAGE_CPU_WRITE
461 if (macroblock
->mb_type
== vlMacroBlockTypeBkwdPredicted
)
463 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
464 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
466 if (macroblock
->mo_type
== vlMotionTypeField
)
468 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
469 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
474 mo_vec
[0].x
= macroblock
->PMV
[0][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
475 mo_vec
[0].y
= macroblock
->PMV
[0][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
477 if (macroblock
->mo_type
== vlMotionTypeField
)
479 mo_vec
[1].x
= macroblock
->PMV
[1][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
480 mo_vec
[1].y
= macroblock
->PMV
[1][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
484 if (macroblock
->mo_type
== vlMotionTypeFrame
)
486 for (i
= 0; i
< 24 * 2; i
+= 2)
488 vb
[i
].x
= mo_vec
[0].x
;
489 vb
[i
].y
= mo_vec
[0].y
;
494 for (i
= 0; i
< 24 * 2; i
+= 2)
496 vb
[i
].x
= mo_vec
[0].x
;
497 vb
[i
].y
= mo_vec
[0].y
;
498 vb
[i
+ 1].x
= mo_vec
[1].x
;
499 vb
[i
+ 1].y
= mo_vec
[1].y
;
503 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
);
507 case vlMacroBlockTypeIntra
:
509 const struct vlVertex2f unit
=
511 mc
->surface_tex_inv_size
.x
* VL_MACROBLOCK_WIDTH
,
512 mc
->surface_tex_inv_size
.y
* VL_MACROBLOCK_HEIGHT
514 const struct vlVertex2f half
=
516 mc
->surface_tex_inv_size
.x
* (VL_MACROBLOCK_WIDTH
/ 2),
517 mc
->surface_tex_inv_size
.y
* (VL_MACROBLOCK_HEIGHT
/ 2)
520 struct vlMacroBlockVertexStream0
522 struct vlVertex2f pos
;
523 struct vlVertex2f luma_tc
;
524 struct vlVertex2f cb_tc
;
525 struct vlVertex2f cr_tc
;
528 vb
= (struct vlMacroBlockVertexStream0
*)mc
->pipe
->winsys
->buffer_map
531 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
,
532 PIPE_BUFFER_USAGE_CPU_WRITE
538 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
539 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
540 32, 2, 1, mc
->zero_block
546 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
547 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
548 16, 2, 1, mc
->zero_block
554 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
555 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
556 8, 2, 1, mc
->zero_block
562 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
563 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
564 4, 2, 1, mc
->zero_block
567 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
);
580 struct vlRender
*render
583 struct vlR16SnormBufferedMC
*mc
;
584 struct pipe_context
*pipe
;
585 struct vlVertexShaderConsts
*vs_consts
;
586 unsigned int num_macroblocks
[vlNumMacroBlockExTypes
] = {0};
587 unsigned int offset
[vlNumMacroBlockExTypes
];
588 unsigned int vb_start
= 0;
593 mc
= (struct vlR16SnormBufferedMC
*)render
;
596 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
598 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
600 num_macroblocks
[mb_type_ex
]++;
605 for (i
= 1; i
< vlNumMacroBlockExTypes
; ++i
)
606 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
608 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
610 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
612 vlGrabMacroBlockVB(mc
, &mc
->macroblocks
[i
], offset
[mb_type_ex
]);
614 offset
[mb_type_ex
]++;
617 mc
->render_target
.cbufs
[0] = pipe
->screen
->get_tex_surface
620 mc
->buffered_surface
->texture
,
621 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ
| PIPE_BUFFER_USAGE_GPU_WRITE
624 pipe
->set_framebuffer_state(pipe
, &mc
->render_target
);
625 pipe
->set_viewport_state(pipe
, &mc
->viewport
);
626 vs_consts
= pipe
->winsys
->buffer_map
629 mc
->vs_const_buf
.buffer
,
630 PIPE_BUFFER_USAGE_CPU_WRITE
633 vs_consts
->denorm
.x
= mc
->buffered_surface
->texture
->width
[0];
634 vs_consts
->denorm
.y
= mc
->buffered_surface
->texture
->height
[0];
636 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
637 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_VERTEX
, 0, &mc
->vs_const_buf
);
638 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_FRAGMENT
, 0, &mc
->fs_const_buf
);
640 if (num_macroblocks
[vlMacroBlockExTypeIntra
] > 0)
642 pipe
->set_vertex_buffers(pipe
, 1, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
643 pipe
->set_vertex_elements(pipe
, 4, mc
->vertex_elems
);
644 pipe
->set_sampler_textures(pipe
, 3, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
645 pipe
->bind_sampler_states(pipe
, 3, (void**)mc
->samplers
);
646 pipe
->bind_vs_state(pipe
, mc
->i_vs
);
647 pipe
->bind_fs_state(pipe
, mc
->i_fs
);
649 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeIntra
] * 24);
650 vb_start
+= num_macroblocks
[vlMacroBlockExTypeIntra
] * 24;
653 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] > 0)
655 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
656 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
657 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
658 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
659 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
660 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
661 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
663 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24);
664 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24;
667 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] > 0)
669 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
670 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
671 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
672 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
673 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
674 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
675 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
677 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24);
678 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24;
681 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] > 0)
683 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
684 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
685 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
686 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
687 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
688 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
689 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
691 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24);
692 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24;
695 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] > 0)
697 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
698 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
699 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
700 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
701 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
702 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
703 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
705 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24);
706 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24;
709 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] > 0)
711 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
712 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
713 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
714 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
715 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
716 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
717 pipe
->bind_vs_state(pipe
, mc
->b_vs
[0]);
718 pipe
->bind_fs_state(pipe
, mc
->b_fs
[0]);
720 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24);
721 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24;
724 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] > 0)
726 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
727 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
728 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
729 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
730 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
731 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
732 pipe
->bind_vs_state(pipe
, mc
->b_vs
[1]);
733 pipe
->bind_fs_state(pipe
, mc
->b_fs
[1]);
735 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24);
736 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24;
739 for (i
= 0; i
< 3; ++i
)
740 mc
->zero_block
[i
].x
= -1.0f
;
741 mc
->num_macroblocks
= 0;
747 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
749 struct vlRender
*render
,
750 struct vlMpeg2MacroBlockBatch
*batch
,
751 struct vlSurface
*surface
754 struct vlR16SnormBufferedMC
*mc
;
759 mc
= (struct vlR16SnormBufferedMC
*)render
;
761 if (mc
->buffered_surface
)
765 mc
->buffered_surface
!= surface
/*||
766 mc->past_surface != batch->past_surface ||
767 mc->future_surface != batch->future_surface*/
771 mc
->buffered_surface
= surface
;
772 mc
->past_surface
= batch
->past_surface
;
773 mc
->future_surface
= batch
->future_surface
;
774 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
775 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
780 mc
->buffered_surface
= surface
;
781 mc
->past_surface
= batch
->past_surface
;
782 mc
->future_surface
= batch
->future_surface
;
783 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
784 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
787 for (i
= 0; i
< batch
->num_macroblocks
; ++i
)
788 vlGrabMacroBlock(mc
, &batch
->macroblocks
[i
]);
795 struct vlRender
*render
805 struct vlRender
*render
808 struct vlR16SnormBufferedMC
*mc
;
809 struct pipe_context
*pipe
;
814 mc
= (struct vlR16SnormBufferedMC
*)render
;
817 for (i
= 0; i
< 5; ++i
)
818 pipe
->delete_sampler_state(pipe
, mc
->samplers
[i
]);
820 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
821 for (i
= 0; i
< 3; ++i
)
822 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vertex_bufs
[h
][i
].buffer
);
824 /* Textures 3 & 4 are not created directly, no need to release them here */
825 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
827 pipe_texture_release(&mc
->textures
[i
][0]);
828 pipe_texture_release(&mc
->textures
[i
][1]);
829 pipe_texture_release(&mc
->textures
[i
][2]);
832 pipe
->delete_vs_state(pipe
, mc
->i_vs
);
833 pipe
->delete_fs_state(pipe
, mc
->i_fs
);
835 for (i
= 0; i
< 2; ++i
)
837 pipe
->delete_vs_state(pipe
, mc
->p_vs
[i
]);
838 pipe
->delete_fs_state(pipe
, mc
->p_fs
[i
]);
839 pipe
->delete_vs_state(pipe
, mc
->b_vs
[i
]);
840 pipe
->delete_fs_state(pipe
, mc
->b_fs
[i
]);
843 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
844 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
846 free(mc
->macroblocks
);
853 * Muliplier renormalizes block samples from 16 bits to 12 bits.
854 * Divider is used when calculating Y % 2 for choosing top or bottom
855 * field for P or B macroblocks.
856 * TODO: Use immediates.
858 static const struct vlFragmentShaderConsts fs_consts
=
860 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
861 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
864 static int vlCreateVertexShaderIMB
866 struct vlR16SnormBufferedMC
*mc
869 const unsigned int max_tokens
= 50;
871 struct pipe_context
*pipe
;
872 struct pipe_shader_state vs
;
873 struct tgsi_token
*tokens
;
874 struct tgsi_header
*header
;
876 struct tgsi_full_declaration decl
;
877 struct tgsi_full_instruction inst
;
885 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
888 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
890 header
= (struct tgsi_header
*)&tokens
[1];
891 *header
= tgsi_build_header();
893 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
898 * decl i0 ; Vertex pos
899 * decl i1 ; Luma texcoords
900 * decl i2 ; Chroma Cb texcoords
901 * decl i3 ; Chroma Cr texcoords
903 for (i
= 0; i
< 4; i
++)
905 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
906 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
910 * decl o0 ; Vertex pos
911 * decl o1 ; Luma texcoords
912 * decl o2 ; Chroma Cb texcoords
913 * decl o3 ; Chroma Cr texcoords
915 for (i
= 0; i
< 4; i
++)
917 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
918 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
922 * mov o0, i0 ; Move input vertex pos to output
923 * mov o1, i1 ; Move input luma texcoords to output
924 * mov o2, i2 ; Move input chroma Cb texcoords to output
925 * mov o3, i3 ; Move input chroma Cr texcoords to output
927 for (i
= 0; i
< 4; ++i
)
929 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
930 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
935 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
938 mc
->i_vs
= pipe
->create_vs_state(pipe
, &vs
);
944 static int vlCreateFragmentShaderIMB
946 struct vlR16SnormBufferedMC
*mc
949 const unsigned int max_tokens
= 100;
951 struct pipe_context
*pipe
;
952 struct pipe_shader_state fs
;
953 struct tgsi_token
*tokens
;
954 struct tgsi_header
*header
;
956 struct tgsi_full_declaration decl
;
957 struct tgsi_full_instruction inst
;
965 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
968 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
970 header
= (struct tgsi_header
*)&tokens
[1];
971 *header
= tgsi_build_header();
973 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
978 * decl i0 ; Luma texcoords
979 * decl i1 ; Chroma Cb texcoords
980 * decl i2 ; Chroma Cr texcoords
982 for (i
= 0; i
< 3; ++i
)
984 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
985 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
988 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
989 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
990 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
992 /* decl o0 ; Fragment color */
993 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
994 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
997 decl
= vl_decl_temps(0, 1);
998 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1001 * decl s0 ; Sampler for luma texture
1002 * decl s1 ; Sampler for chroma Cb texture
1003 * decl s2 ; Sampler for chroma Cr texture
1005 for (i
= 0; i
< 3; ++i
)
1007 decl
= vl_decl_samplers(i
, i
);
1008 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
,max_tokens
- ti
);
1012 * tex2d t1, i0, s0 ; Read texel from luma texture
1013 * mov t0.x, t1.x ; Move luma sample into .x component
1014 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1015 * mov t0.y, t1.x ; Move Cb sample into .y component
1016 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1017 * mov t0.z, t1.x ; Move Cr sample into .z component
1019 for (i
= 0; i
< 3; ++i
)
1021 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1022 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1024 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1025 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1026 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1027 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1028 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1029 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1033 /* mul o0, t0, c0 ; Rescale texel to correct range */
1034 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1035 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1039 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1042 mc
->i_fs
= pipe
->create_fs_state(pipe
, &fs
);
1048 static int vlCreateVertexShaderFramePMB
1050 struct vlR16SnormBufferedMC
*mc
1053 const unsigned int max_tokens
= 100;
1055 struct pipe_context
*pipe
;
1056 struct pipe_shader_state vs
;
1057 struct tgsi_token
*tokens
;
1058 struct tgsi_header
*header
;
1060 struct tgsi_full_declaration decl
;
1061 struct tgsi_full_instruction inst
;
1069 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1072 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1074 header
= (struct tgsi_header
*)&tokens
[1];
1075 *header
= tgsi_build_header();
1077 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1082 * decl i0 ; Vertex pos
1083 * decl i1 ; Luma texcoords
1084 * decl i2 ; Chroma Cb texcoords
1085 * decl i3 ; Chroma Cr texcoords
1086 * decl i4 ; Ref surface top field texcoords
1087 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1089 for (i
= 0; i
< 6; i
++)
1091 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1092 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1096 * decl o0 ; Vertex pos
1097 * decl o1 ; Luma texcoords
1098 * decl o2 ; Chroma Cb texcoords
1099 * decl o3 ; Chroma Cr texcoords
1100 * decl o4 ; Ref macroblock texcoords
1102 for (i
= 0; i
< 5; i
++)
1104 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1105 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1109 * mov o0, i0 ; Move input vertex pos to output
1110 * mov o1, i1 ; Move input luma texcoords to output
1111 * mov o2, i2 ; Move input chroma Cb texcoords to output
1112 * mov o3, i3 ; Move input chroma Cr texcoords to output
1114 for (i
= 0; i
< 4; ++i
)
1116 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1117 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1120 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1121 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
1122 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1126 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1129 mc
->p_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1135 static int vlCreateVertexShaderFieldPMB
1137 struct vlR16SnormBufferedMC
*mc
1140 const unsigned int max_tokens
= 100;
1142 struct pipe_context
*pipe
;
1143 struct pipe_shader_state vs
;
1144 struct tgsi_token
*tokens
;
1145 struct tgsi_header
*header
;
1147 struct tgsi_full_declaration decl
;
1148 struct tgsi_full_instruction inst
;
1156 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1159 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1161 header
= (struct tgsi_header
*)&tokens
[1];
1162 *header
= tgsi_build_header();
1164 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1169 * decl i0 ; Vertex pos
1170 * decl i1 ; Luma texcoords
1171 * decl i2 ; Chroma Cb texcoords
1172 * decl i3 ; Chroma Cr texcoords
1173 * decl i4 ; Ref macroblock top field texcoords
1174 * decl i5 ; Ref macroblock bottom field texcoords
1176 for (i
= 0; i
< 6; i
++)
1178 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1179 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1182 /* decl c0 ; Texcoord denorm coefficients */
1183 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1184 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1187 * decl o0 ; Vertex pos
1188 * decl o1 ; Luma texcoords
1189 * decl o2 ; Chroma Cb texcoords
1190 * decl o3 ; Chroma Cr texcoords
1191 * decl o4 ; Ref macroblock top field texcoords
1192 * decl o5 ; Ref macroblock bottom field texcoords
1193 * decl o6 ; Denormalized vertex pos
1195 for (i
= 0; i
< 7; i
++)
1197 decl
= vl_decl_output((i
== 0 || i
== 6) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1198 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1202 * mov o0, i0 ; Move input vertex pos to output
1203 * mov o1, i1 ; Move input luma texcoords to output
1204 * mov o2, i2 ; Move input chroma Cb texcoords to output
1205 * mov o3, i3 ; Move input chroma Cr texcoords to output
1207 for (i
= 0; i
< 4; ++i
)
1209 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1210 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1214 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1215 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1217 for (i
= 0; i
< 2; ++i
)
1219 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1220 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1223 /* mul o6, i0, c0 ; Denorm vertex pos */
1224 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 6, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1225 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1229 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1232 mc
->p_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1238 static int vlCreateFragmentShaderFramePMB
1240 struct vlR16SnormBufferedMC
*mc
1243 const unsigned int max_tokens
= 100;
1245 struct pipe_context
*pipe
;
1246 struct pipe_shader_state fs
;
1247 struct tgsi_token
*tokens
;
1248 struct tgsi_header
*header
;
1250 struct tgsi_full_declaration decl
;
1251 struct tgsi_full_instruction inst
;
1259 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1262 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1264 header
= (struct tgsi_header
*)&tokens
[1];
1265 *header
= tgsi_build_header();
1267 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1272 * decl i0 ; Luma texcoords
1273 * decl i1 ; Chroma Cb texcoords
1274 * decl i2 ; Chroma Cr texcoords
1275 * decl i3 ; Ref macroblock texcoords
1277 for (i
= 0; i
< 4; ++i
)
1279 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1280 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1283 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1284 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1285 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1287 /* decl o0 ; Fragment color */
1288 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1289 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1292 decl
= vl_decl_temps(0, 1);
1293 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1296 * decl s0 ; Sampler for luma texture
1297 * decl s1 ; Sampler for chroma Cb texture
1298 * decl s2 ; Sampler for chroma Cr texture
1299 * decl s3 ; Sampler for ref surface texture
1301 for (i
= 0; i
< 4; ++i
)
1303 decl
= vl_decl_samplers(i
, i
);
1304 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1308 * tex2d t1, i0, s0 ; Read texel from luma texture
1309 * mov t0.x, t1.x ; Move luma sample into .x component
1310 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1311 * mov t0.y, t1.x ; Move Cb sample into .y component
1312 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1313 * mov t0.z, t1.x ; Move Cr sample into .z component
1315 for (i
= 0; i
< 3; ++i
)
1317 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1318 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1320 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1321 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1322 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1323 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1324 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1325 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1329 /* mul t0, t0, c0 ; Rescale texel to correct range */
1330 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1331 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1333 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1334 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
1335 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1337 /* add o0, t0, t1 ; Add ref and differential to form final output */
1338 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1339 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1343 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1346 mc
->p_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1352 static int vlCreateFragmentShaderFieldPMB
1354 struct vlR16SnormBufferedMC
*mc
1357 const unsigned int max_tokens
= 200;
1359 struct pipe_context
*pipe
;
1360 struct pipe_shader_state fs
;
1361 struct tgsi_token
*tokens
;
1362 struct tgsi_header
*header
;
1364 struct tgsi_full_declaration decl
;
1365 struct tgsi_full_instruction inst
;
1373 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1376 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1378 header
= (struct tgsi_header
*)&tokens
[1];
1379 *header
= tgsi_build_header();
1381 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1386 * decl i0 ; Luma texcoords
1387 * decl i1 ; Chroma Cb texcoords
1388 * decl i2 ; Chroma Cr texcoords
1389 * decl i3 ; Ref macroblock top field texcoords
1390 * decl i4 ; Ref macroblock bottom field texcoords
1391 * decl i5 ; Denormalized vertex pos
1393 for (i
= 0; i
< 6; ++i
)
1395 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1396 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1400 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1401 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1403 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1404 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1406 /* decl o0 ; Fragment color */
1407 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1408 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1411 decl
= vl_decl_temps(0, 4);
1412 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1415 * decl s0 ; Sampler for luma texture
1416 * decl s1 ; Sampler for chroma Cb texture
1417 * decl s2 ; Sampler for chroma Cr texture
1418 * decl s3 ; Sampler for ref surface texture
1420 for (i
= 0; i
< 4; ++i
)
1422 decl
= vl_decl_samplers(i
, i
);
1423 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1427 * tex2d t1, i0, s0 ; Read texel from luma texture
1428 * mov t0.x, t1.x ; Move luma sample into .x component
1429 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1430 * mov t0.y, t1.x ; Move Cb sample into .y component
1431 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1432 * mov t0.z, t1.x ; Move Cr sample into .z component
1434 for (i
= 0; i
< 3; ++i
)
1436 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1437 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1439 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1440 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1441 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1442 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1443 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1444 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1448 /* mul t0, t0, c0 ; Rescale texel to correct range */
1449 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1450 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1453 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1454 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1456 for (i
= 0; i
< 2; ++i
)
1458 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
1459 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1462 /* XXX: Pos values off by 0.5? */
1463 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1464 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 5, TGSI_FILE_CONSTANT
, 1);
1465 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1466 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1467 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1468 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1469 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1470 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1471 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1472 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1473 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1475 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1476 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1477 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1478 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1479 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1480 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1481 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1483 /* floor t3, t3 ; Get rid of fractional part */
1484 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1485 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1487 /* mul t3, t3, c1.y ; Multiply by 2 */
1488 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1489 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1490 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1491 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1492 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1493 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1495 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1496 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1497 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1499 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1500 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1501 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1502 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1504 /* add o0, t0, t1 ; Add ref and differential to form final output */
1505 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1506 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1510 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1513 mc
->p_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
1519 static int vlCreateVertexShaderFrameBMB
1521 struct vlR16SnormBufferedMC
*mc
1524 const unsigned int max_tokens
= 100;
1526 struct pipe_context
*pipe
;
1527 struct pipe_shader_state vs
;
1528 struct tgsi_token
*tokens
;
1529 struct tgsi_header
*header
;
1531 struct tgsi_full_declaration decl
;
1532 struct tgsi_full_instruction inst
;
1540 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1543 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1545 header
= (struct tgsi_header
*)&tokens
[1];
1546 *header
= tgsi_build_header();
1548 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1553 * decl i0 ; Vertex pos
1554 * decl i1 ; Luma texcoords
1555 * decl i2 ; Chroma Cb texcoords
1556 * decl i3 ; Chroma Cr texcoords
1557 * decl i4 ; First ref macroblock top field texcoords
1558 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1559 * decl i6 ; Second ref macroblock top field texcoords
1560 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1562 for (i
= 0; i
< 8; i
++)
1564 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1565 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1569 * decl o0 ; Vertex pos
1570 * decl o1 ; Luma texcoords
1571 * decl o2 ; Chroma Cb texcoords
1572 * decl o3 ; Chroma Cr texcoords
1573 * decl o4 ; First ref macroblock texcoords
1574 * decl o5 ; Second ref macroblock texcoords
1576 for (i
= 0; i
< 6; i
++)
1578 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1579 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1583 * mov o0, i0 ; Move input vertex pos to output
1584 * mov o1, i1 ; Move input luma texcoords to output
1585 * mov o2, i2 ; Move input chroma Cb texcoords to output
1586 * mov o3, i3 ; Move input chroma Cr texcoords to output
1588 for (i
= 0; i
< 4; ++i
)
1590 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1591 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1595 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1596 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1598 for (i
= 0; i
< 2; ++i
)
1600 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
1601 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1606 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1609 mc
->b_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1615 static int vlCreateVertexShaderFieldBMB
1617 struct vlR16SnormBufferedMC
*mc
1620 const unsigned int max_tokens
= 100;
1622 struct pipe_context
*pipe
;
1623 struct pipe_shader_state vs
;
1624 struct tgsi_token
*tokens
;
1625 struct tgsi_header
*header
;
1627 struct tgsi_full_declaration decl
;
1628 struct tgsi_full_instruction inst
;
1636 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1639 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1641 header
= (struct tgsi_header
*)&tokens
[1];
1642 *header
= tgsi_build_header();
1644 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1649 * decl i0 ; Vertex pos
1650 * decl i1 ; Luma texcoords
1651 * decl i2 ; Chroma Cb texcoords
1652 * decl i3 ; Chroma Cr texcoords
1653 * decl i4 ; First ref macroblock top field texcoords
1654 * decl i5 ; First ref macroblock bottom field texcoords
1655 * decl i6 ; Second ref macroblock top field texcoords
1656 * decl i7 ; Second ref macroblock bottom field texcoords
1658 for (i
= 0; i
< 8; i
++)
1660 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1661 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1664 /* decl c0 ; Denorm coefficients */
1665 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 6);
1666 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1669 * decl o0 ; Vertex pos
1670 * decl o1 ; Luma texcoords
1671 * decl o2 ; Chroma Cb texcoords
1672 * decl o3 ; Chroma Cr texcoords
1673 * decl o4 ; First ref macroblock top field texcoords
1674 * decl o5 ; First ref macroblock Bottom field texcoords
1675 * decl o6 ; Second ref macroblock top field texcoords
1676 * decl o7 ; Second ref macroblock Bottom field texcoords
1677 * decl o8 ; Denormalized vertex pos
1679 for (i
= 0; i
< 9; i
++)
1681 decl
= vl_decl_output((i
== 0 || i
== 8) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1682 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1686 decl
= vl_decl_temps(0, 1);
1687 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1690 * mov o0, i0 ; Move input vertex pos to output
1691 * mov o1, i1 ; Move input luma texcoords to output
1692 * mov o2, i2 ; Move input chroma Cb texcoords to output
1693 * mov o3, i3 ; Move input chroma Cr texcoords to output
1695 for (i
= 0; i
< 4; ++i
)
1697 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1698 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1702 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1703 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1704 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1705 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1707 for (i
= 0; i
< 4; ++i
)
1709 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1710 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1713 /* mul o8, i0, c0 ; Denorm vertex pos */
1714 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 8, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1715 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1719 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1722 mc
->b_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1728 static int vlCreateFragmentShaderFrameBMB
1730 struct vlR16SnormBufferedMC
*mc
1733 const unsigned int max_tokens
= 100;
1735 struct pipe_context
*pipe
;
1736 struct pipe_shader_state fs
;
1737 struct tgsi_token
*tokens
;
1738 struct tgsi_header
*header
;
1740 struct tgsi_full_declaration decl
;
1741 struct tgsi_full_instruction inst
;
1749 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1752 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1754 header
= (struct tgsi_header
*)&tokens
[1];
1755 *header
= tgsi_build_header();
1757 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1762 * decl i0 ; Luma texcoords
1763 * decl i1 ; Chroma Cb texcoords
1764 * decl i2 ; Chroma Cr texcoords
1765 * decl i3 ; First ref macroblock texcoords
1766 * decl i4 ; Second ref macroblock texcoords
1768 for (i
= 0; i
< 5; ++i
)
1770 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1771 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1775 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1776 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1778 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1779 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1781 /* decl o0 ; Fragment color */
1782 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1783 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1786 decl
= vl_decl_temps(0, 2);
1787 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1790 * decl s0 ; Sampler for luma texture
1791 * decl s1 ; Sampler for chroma Cb texture
1792 * decl s2 ; Sampler for chroma Cr texture
1793 * decl s3 ; Sampler for first ref surface texture
1794 * decl s4 ; Sampler for second ref surface texture
1796 for (i
= 0; i
< 5; ++i
)
1798 decl
= vl_decl_samplers(i
, i
);
1799 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1803 * tex2d t1, i0, s0 ; Read texel from luma texture
1804 * mov t0.x, t1.x ; Move luma sample into .x component
1805 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1806 * mov t0.y, t1.x ; Move Cb sample into .y component
1807 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1808 * mov t0.z, t1.x ; Move Cr sample into .z component
1810 for (i
= 0; i
< 3; ++i
)
1812 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1813 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1815 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1816 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1817 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1818 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1819 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1820 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1824 /* mul t0, t0, c0 ; Rescale texel to correct range */
1825 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1826 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1829 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1830 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1832 for (i
= 0; i
< 2; ++i
)
1834 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
1835 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1838 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1839 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1840 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1841 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1842 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1843 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1844 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1846 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1847 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1848 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1852 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1855 mc
->b_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1861 static int vlCreateFragmentShaderFieldBMB
1863 struct vlR16SnormBufferedMC
*mc
1866 const unsigned int max_tokens
= 200;
1868 struct pipe_context
*pipe
;
1869 struct pipe_shader_state fs
;
1870 struct tgsi_token
*tokens
;
1871 struct tgsi_header
*header
;
1873 struct tgsi_full_declaration decl
;
1874 struct tgsi_full_instruction inst
;
1882 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1885 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1887 header
= (struct tgsi_header
*)&tokens
[1];
1888 *header
= tgsi_build_header();
1890 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1895 * decl i0 ; Luma texcoords
1896 * decl i1 ; Chroma Cb texcoords
1897 * decl i2 ; Chroma Cr texcoords
1898 * decl i3 ; First ref macroblock top field texcoords
1899 * decl i4 ; First ref macroblock bottom field texcoords
1900 * decl i5 ; Second ref macroblock top field texcoords
1901 * decl i6 ; Second ref macroblock bottom field texcoords
1902 * decl i7 ; Denormalized vertex pos
1904 for (i
= 0; i
< 8; ++i
)
1906 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1907 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1911 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1912 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1913 * ; and for Y-mod-2 top/bottom field selection
1915 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1916 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1918 /* decl o0 ; Fragment color */
1919 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1920 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1923 decl
= vl_decl_temps(0, 5);
1924 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1927 * decl s0 ; Sampler for luma texture
1928 * decl s1 ; Sampler for chroma Cb texture
1929 * decl s2 ; Sampler for chroma Cr texture
1930 * decl s3 ; Sampler for first ref surface texture
1931 * decl s4 ; Sampler for second ref surface texture
1933 for (i
= 0; i
< 5; ++i
)
1935 decl
= vl_decl_samplers(i
, i
);
1936 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1940 * tex2d t1, i0, s0 ; Read texel from luma texture
1941 * mov t0.x, t1.x ; Move luma sample into .x component
1942 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1943 * mov t0.y, t1.x ; Move Cb sample into .y component
1944 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1945 * mov t0.z, t1.x ; Move Cr sample into .z component
1947 for (i
= 0; i
< 3; ++i
)
1949 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1950 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1952 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1953 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1954 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1955 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1956 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1957 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1961 /* mul t0, t0, c0 ; Rescale texel to correct range */
1962 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1963 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1965 /* XXX: Pos values off by 0.5? */
1966 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1967 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 7, TGSI_FILE_CONSTANT
, 1);
1968 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1969 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1970 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1971 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1972 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1973 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1974 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1975 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1976 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1978 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1979 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1980 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1981 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1982 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1983 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1984 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1986 /* floor t3, t3 ; Get rid of fractional part */
1987 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1988 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1990 /* mul t3, t3, c1.y ; Multiply by 2 */
1991 inst
= vl_inst3( TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1992 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1993 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1994 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1995 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1996 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1998 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1999 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
2000 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2003 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
2004 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
2006 for (i
= 0; i
< 2; ++i
)
2008 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
2009 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2012 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2013 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2014 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2015 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2018 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2019 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2021 for (i
= 0; i
< 2; ++i
)
2023 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 4, TGSI_FILE_INPUT
, i
+ 5, TGSI_FILE_SAMPLER
, 4);
2024 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2027 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2028 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2029 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 2, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 5);
2030 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2032 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2033 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2034 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2035 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2036 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2037 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2038 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2040 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2041 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
2042 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2046 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2049 mc
->b_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
2055 static int vlCreateDataBufs
2057 struct vlR16SnormBufferedMC
*mc
2060 const unsigned int mbw
= align(mc
->picture_width
, VL_MACROBLOCK_WIDTH
) / VL_MACROBLOCK_WIDTH
;
2061 const unsigned int mbh
= align(mc
->picture_height
, VL_MACROBLOCK_HEIGHT
) / VL_MACROBLOCK_HEIGHT
;
2062 const unsigned int num_mb_per_frame
= mbw
* mbh
;
2064 struct pipe_context
*pipe
;
2071 /* Create our vertex buffers */
2072 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
2074 mc
->vertex_bufs
[h
][0].pitch
= sizeof(struct vlVertex2f
) * 4;
2075 mc
->vertex_bufs
[h
][0].max_index
= 24 * num_mb_per_frame
- 1;
2076 mc
->vertex_bufs
[h
][0].buffer_offset
= 0;
2077 mc
->vertex_bufs
[h
][0].buffer
= pipe
->winsys
->buffer_create
2081 PIPE_BUFFER_USAGE_VERTEX
,
2082 sizeof(struct vlVertex2f
) * 4 * 24 * num_mb_per_frame
2085 for (i
= 1; i
< 3; ++i
)
2087 mc
->vertex_bufs
[h
][i
].pitch
= sizeof(struct vlVertex2f
) * 2;
2088 mc
->vertex_bufs
[h
][i
].max_index
= 24 * num_mb_per_frame
- 1;
2089 mc
->vertex_bufs
[h
][i
].buffer_offset
= 0;
2090 mc
->vertex_bufs
[h
][i
].buffer
= pipe
->winsys
->buffer_create
2094 PIPE_BUFFER_USAGE_VERTEX
,
2095 sizeof(struct vlVertex2f
) * 2 * 24 * num_mb_per_frame
2100 /* Position element */
2101 mc
->vertex_elems
[0].src_offset
= 0;
2102 mc
->vertex_elems
[0].vertex_buffer_index
= 0;
2103 mc
->vertex_elems
[0].nr_components
= 2;
2104 mc
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2106 /* Luma, texcoord element */
2107 mc
->vertex_elems
[1].src_offset
= sizeof(struct vlVertex2f
);
2108 mc
->vertex_elems
[1].vertex_buffer_index
= 0;
2109 mc
->vertex_elems
[1].nr_components
= 2;
2110 mc
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2112 /* Chroma Cr texcoord element */
2113 mc
->vertex_elems
[2].src_offset
= sizeof(struct vlVertex2f
) * 2;
2114 mc
->vertex_elems
[2].vertex_buffer_index
= 0;
2115 mc
->vertex_elems
[2].nr_components
= 2;
2116 mc
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2118 /* Chroma Cb texcoord element */
2119 mc
->vertex_elems
[3].src_offset
= sizeof(struct vlVertex2f
) * 3;
2120 mc
->vertex_elems
[3].vertex_buffer_index
= 0;
2121 mc
->vertex_elems
[3].nr_components
= 2;
2122 mc
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2124 /* First ref surface top field texcoord element */
2125 mc
->vertex_elems
[4].src_offset
= 0;
2126 mc
->vertex_elems
[4].vertex_buffer_index
= 1;
2127 mc
->vertex_elems
[4].nr_components
= 2;
2128 mc
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2130 /* First ref surface bottom field texcoord element */
2131 mc
->vertex_elems
[5].src_offset
= sizeof(struct vlVertex2f
);
2132 mc
->vertex_elems
[5].vertex_buffer_index
= 1;
2133 mc
->vertex_elems
[5].nr_components
= 2;
2134 mc
->vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2136 /* Second ref surface top field texcoord element */
2137 mc
->vertex_elems
[6].src_offset
= 0;
2138 mc
->vertex_elems
[6].vertex_buffer_index
= 2;
2139 mc
->vertex_elems
[6].nr_components
= 2;
2140 mc
->vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2142 /* Second ref surface bottom field texcoord element */
2143 mc
->vertex_elems
[7].src_offset
= sizeof(struct vlVertex2f
);
2144 mc
->vertex_elems
[7].vertex_buffer_index
= 2;
2145 mc
->vertex_elems
[7].nr_components
= 2;
2146 mc
->vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2148 /* Create our constant buffer */
2149 mc
->vs_const_buf
.size
= sizeof(struct vlVertexShaderConsts
);
2150 mc
->vs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2154 PIPE_BUFFER_USAGE_CONSTANT
,
2155 mc
->vs_const_buf
.size
2158 mc
->fs_const_buf
.size
= sizeof(struct vlFragmentShaderConsts
);
2159 mc
->fs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2163 PIPE_BUFFER_USAGE_CONSTANT
,
2164 mc
->fs_const_buf
.size
2169 pipe
->winsys
->buffer_map(pipe
->winsys
, mc
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
2171 sizeof(struct vlFragmentShaderConsts
)
2174 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
2176 mc
->macroblocks
= malloc(sizeof(struct vlMpeg2MacroBlock
) * num_mb_per_frame
);
2183 struct vlR16SnormBufferedMC
*mc
2186 struct pipe_context
*pipe
;
2187 struct pipe_sampler_state sampler
;
2188 struct pipe_texture
template;
2189 unsigned int filters
[5];
2196 /* For MC we render to textures, which are rounded up to nearest POT */
2197 mc
->viewport
.scale
[0] = vlRoundUpPOT(mc
->picture_width
);
2198 mc
->viewport
.scale
[1] = vlRoundUpPOT(mc
->picture_height
);
2199 mc
->viewport
.scale
[2] = 1;
2200 mc
->viewport
.scale
[3] = 1;
2201 mc
->viewport
.translate
[0] = 0;
2202 mc
->viewport
.translate
[1] = 0;
2203 mc
->viewport
.translate
[2] = 0;
2204 mc
->viewport
.translate
[3] = 0;
2206 mc
->render_target
.width
= vlRoundUpPOT(mc
->picture_width
);
2207 mc
->render_target
.height
= vlRoundUpPOT(mc
->picture_height
);
2208 mc
->render_target
.num_cbufs
= 1;
2209 /* FB for MC stage is a vlSurface created by the user, set at render time */
2210 mc
->render_target
.zsbuf
= NULL
;
2212 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
2213 /* FIXME: Linear causes discoloration around block edges */
2214 filters
[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2215 filters
[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2216 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
2217 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
2219 for (i
= 0; i
< 5; ++i
)
2221 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2222 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2223 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2224 sampler
.min_img_filter
= filters
[i
];
2225 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
2226 sampler
.mag_img_filter
= filters
[i
];
2227 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
2228 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
2229 sampler
.normalized_coords
= 1;
2230 /*sampler.prefilter = ;*/
2231 /*sampler.shadow_ambient = ;*/
2232 /*sampler.lod_bias = ;*/
2233 sampler
.min_lod
= 0;
2234 /*sampler.max_lod = ;*/
2235 /*sampler.border_color[i] = ;*/
2236 /*sampler.max_anisotropy = ;*/
2237 mc
->samplers
[i
] = pipe
->create_sampler_state(pipe
, &sampler
);
2240 memset(&template, 0, sizeof(struct pipe_texture
));
2241 template.target
= PIPE_TEXTURE_2D
;
2242 template.format
= PIPE_FORMAT_R16_SNORM
;
2243 template.last_level
= 0;
2244 template.width
[0] = vlRoundUpPOT(mc
->picture_width
);
2245 template.height
[0] = vlRoundUpPOT(mc
->picture_height
);
2246 template.depth
[0] = 1;
2247 template.compressed
= 0;
2248 pf_get_block(template.format
, &template.block
);
2250 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2251 mc
->textures
[i
][0] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2253 if (mc
->picture_format
== vlFormatYCbCr420
)
2255 template.width
[0] = vlRoundUpPOT(mc
->picture_width
/ 2);
2256 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2258 else if (mc
->picture_format
== vlFormatYCbCr422
)
2259 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2261 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2263 mc
->textures
[i
][1] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2264 mc
->textures
[i
][2] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2267 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2269 vlCreateVertexShaderIMB(mc
);
2270 vlCreateFragmentShaderIMB(mc
);
2271 vlCreateVertexShaderFramePMB(mc
);
2272 vlCreateVertexShaderFieldPMB(mc
);
2273 vlCreateFragmentShaderFramePMB(mc
);
2274 vlCreateFragmentShaderFieldPMB(mc
);
2275 vlCreateVertexShaderFrameBMB(mc
);
2276 vlCreateVertexShaderFieldBMB(mc
);
2277 vlCreateFragmentShaderFrameBMB(mc
);
2278 vlCreateFragmentShaderFieldBMB(mc
);
2279 vlCreateDataBufs(mc
);
2284 int vlCreateR16SNormBufferedMC
2286 struct pipe_context
*pipe
,
2287 unsigned int picture_width
,
2288 unsigned int picture_height
,
2289 enum vlFormat picture_format
,
2290 struct vlRender
**render
2293 struct vlR16SnormBufferedMC
*mc
;
2299 mc
= calloc(1, sizeof(struct vlR16SnormBufferedMC
));
2301 mc
->base
.vlBegin
= &vlBegin
;
2302 mc
->base
.vlRenderMacroBlocksMpeg2
= &vlRenderMacroBlocksMpeg2R16SnormBuffered
;
2303 mc
->base
.vlEnd
= &vlEnd
;
2304 mc
->base
.vlFlush
= &vlFlush
;
2305 mc
->base
.vlDestroy
= &vlDestroy
;
2307 mc
->picture_width
= picture_width
;
2308 mc
->picture_height
= picture_height
;
2311 mc
->buffered_surface
= NULL
;
2312 mc
->past_surface
= NULL
;
2313 mc
->future_surface
= NULL
;
2314 for (i
= 0; i
< 3; ++i
)
2315 mc
->zero_block
[i
].x
= -1.0f
;
2316 mc
->num_macroblocks
= 0;
2320 *render
= &mc
->base
;