2 #include "vl_r16snorm_mc_buf.h"
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_inlines.h>
10 #include <tgsi/tgsi_parse.h>
11 #include <tgsi/tgsi_build.h>
12 #include <util/u_math.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
31 enum vlMacroBlockTypeEx
33 vlMacroBlockExTypeIntra
,
34 vlMacroBlockExTypeFwdPredictedFrame
,
35 vlMacroBlockExTypeFwdPredictedField
,
36 vlMacroBlockExTypeBkwdPredictedFrame
,
37 vlMacroBlockExTypeBkwdPredictedField
,
38 vlMacroBlockExTypeBiPredictedFrame
,
39 vlMacroBlockExTypeBiPredictedField
,
41 vlNumMacroBlockExTypes
44 struct vlVertexShaderConsts
46 struct vlVertex4f denorm
;
49 struct vlFragmentShaderConsts
51 struct vlVertex4f multiplier
;
52 struct vlVertex4f div
;
55 struct vlR16SnormBufferedMC
59 unsigned int picture_width
, picture_height
;
60 enum vlFormat picture_format
;
63 struct vlSurface
*buffered_surface
;
64 struct vlSurface
*past_surface
, *future_surface
;
65 struct vlVertex2f surface_tex_inv_size
;
66 struct vlVertex2f zero_block
[3];
67 unsigned int num_macroblocks
;
68 struct vlMpeg2MacroBlock
*macroblocks
;
70 struct pipe_context
*pipe
;
71 struct pipe_viewport_state viewport
;
72 struct pipe_framebuffer_state render_target
;
73 struct pipe_sampler_state
*samplers
[5];
74 struct pipe_texture
*textures
[NUM_BUF_SETS
][5];
75 void *i_vs
, *p_vs
[2], *b_vs
[2];
76 void *i_fs
, *p_fs
[2], *b_fs
[2];
77 struct pipe_vertex_buffer vertex_bufs
[NUM_BUF_SETS
][3];
78 struct pipe_vertex_element vertex_elems
[8];
79 struct pipe_constant_buffer vs_const_buf
, fs_const_buf
;
84 struct vlRender
*render
92 static inline int vlGrabFrameCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
96 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
100 src
+ y
* VL_BLOCK_WIDTH
,
107 static inline int vlGrabFieldCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
111 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
114 dst
+ y
* dst_pitch
* 2,
115 src
+ y
* VL_BLOCK_WIDTH
,
122 static inline int vlGrabNoBlock(short *dst
, unsigned int dst_pitch
)
126 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
137 static inline int vlGrabBlocks
139 struct vlR16SnormBufferedMC
*mc
,
142 enum vlDCTType dct_type
,
143 unsigned int coded_block_pattern
,
147 struct pipe_surface
*tex_surface
;
149 unsigned int tex_pitch
;
150 unsigned int x
, y
, tb
= 0, sb
= 0;
151 unsigned int mbpx
= mbx
* VL_MACROBLOCK_WIDTH
, mbpy
= mby
* VL_MACROBLOCK_HEIGHT
;
156 tex_surface
= mc
->pipe
->screen
->get_tex_surface
159 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
163 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
164 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
166 texels
+= mbpy
* tex_pitch
+ mbpx
;
168 for (y
= 0; y
< 2; ++y
)
170 for (x
= 0; x
< 2; ++x
, ++tb
)
172 if ((coded_block_pattern
>> (5 - tb
)) & 1)
174 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
176 if (dct_type
== vlDCTTypeFrameCoded
)
178 vlGrabFrameCodedBlock
181 texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
,
187 vlGrabFieldCodedBlock
190 texels
+ y
* tex_pitch
+ x
* VL_BLOCK_WIDTH
,
197 else if (mc
->zero_block
[0].x
< 0.0f
)
199 vlGrabNoBlock(texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
, tex_pitch
);
201 mc
->zero_block
[0].x
= (mbpx
+ x
* 8) * mc
->surface_tex_inv_size
.x
;
202 mc
->zero_block
[0].y
= (mbpy
+ y
* 8) * mc
->surface_tex_inv_size
.y
;
207 pipe_surface_unmap(tex_surface
);
209 /* TODO: Implement 422, 444 */
213 for (tb
= 0; tb
< 2; ++tb
)
215 tex_surface
= mc
->pipe
->screen
->get_tex_surface
218 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][tb
+ 1],
219 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
222 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
223 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
225 texels
+= mbpy
* tex_pitch
+ mbpx
;
227 if ((coded_block_pattern
>> (1 - tb
)) & 1)
229 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
231 vlGrabFrameCodedBlock
240 else if (mc
->zero_block
[tb
+ 1].x
< 0.0f
)
242 vlGrabNoBlock(texels
, tex_pitch
);
244 mc
->zero_block
[tb
+ 1].x
= (mbpx
<< 1) * mc
->surface_tex_inv_size
.x
;
245 mc
->zero_block
[tb
+ 1].y
= (mbpy
<< 1) * mc
->surface_tex_inv_size
.y
;
248 pipe_surface_unmap(tex_surface
);
254 static inline enum vlMacroBlockTypeEx
vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock
*mb
)
260 case vlMacroBlockTypeIntra
:
261 return vlMacroBlockExTypeIntra
;
262 case vlMacroBlockTypeFwdPredicted
:
263 return mb
->mo_type
== vlMotionTypeFrame
?
264 vlMacroBlockExTypeFwdPredictedFrame
: vlMacroBlockExTypeFwdPredictedField
;
265 case vlMacroBlockTypeBkwdPredicted
:
266 return mb
->mo_type
== vlMotionTypeFrame
?
267 vlMacroBlockExTypeBkwdPredictedFrame
: vlMacroBlockExTypeBkwdPredictedField
;
268 case vlMacroBlockTypeBiPredicted
:
269 return mb
->mo_type
== vlMotionTypeFrame
?
270 vlMacroBlockExTypeBiPredictedFrame
: vlMacroBlockExTypeBiPredictedField
;
279 static inline int vlGrabMacroBlock
281 struct vlR16SnormBufferedMC
*mc
,
282 struct vlMpeg2MacroBlock
*macroblock
288 mc
->macroblocks
[mc
->num_macroblocks
].mbx
= macroblock
->mbx
;
289 mc
->macroblocks
[mc
->num_macroblocks
].mby
= macroblock
->mby
;
290 mc
->macroblocks
[mc
->num_macroblocks
].mb_type
= macroblock
->mb_type
;
291 mc
->macroblocks
[mc
->num_macroblocks
].mo_type
= macroblock
->mo_type
;
292 mc
->macroblocks
[mc
->num_macroblocks
].dct_type
= macroblock
->dct_type
;
293 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][0] = macroblock
->PMV
[0][0][0];
294 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][1] = macroblock
->PMV
[0][0][1];
295 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][0] = macroblock
->PMV
[0][1][0];
296 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][1] = macroblock
->PMV
[0][1][1];
297 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][0] = macroblock
->PMV
[1][0][0];
298 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][1] = macroblock
->PMV
[1][0][1];
299 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][0] = macroblock
->PMV
[1][1][0];
300 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][1] = macroblock
->PMV
[1][1][1];
301 mc
->macroblocks
[mc
->num_macroblocks
].cbp
= macroblock
->cbp
;
302 mc
->macroblocks
[mc
->num_macroblocks
].blocks
= macroblock
->blocks
;
309 macroblock
->dct_type
,
314 mc
->num_macroblocks
++;
319 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
320 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
321 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
322 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
323 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
324 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
325 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
329 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
330 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
331 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
332 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
333 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
334 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
338 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
339 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
340 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
341 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
342 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
343 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
348 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
349 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
350 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
351 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
352 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
353 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
357 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
358 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
359 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
360 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
361 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
362 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
367 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
368 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
369 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
370 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
371 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
372 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
376 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
377 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
378 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
379 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
380 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
381 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
384 static inline int vlGrabMacroBlockVB
386 struct vlR16SnormBufferedMC
*mc
,
387 struct vlMpeg2MacroBlock
*macroblock
,
391 struct vlVertex2f mo_vec
[2];
397 switch (macroblock
->mb_type
)
399 case vlMacroBlockTypeBiPredicted
:
401 struct vlVertex2f
*vb
;
403 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
406 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
,
407 PIPE_BUFFER_USAGE_CPU_WRITE
410 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
411 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
413 if (macroblock
->mo_type
== vlMotionTypeFrame
)
415 for (i
= 0; i
< 24 * 2; i
+= 2)
417 vb
[i
].x
= mo_vec
[0].x
;
418 vb
[i
].y
= mo_vec
[0].y
;
423 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
424 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
426 for (i
= 0; i
< 24 * 2; i
+= 2)
428 vb
[i
].x
= mo_vec
[0].x
;
429 vb
[i
].y
= mo_vec
[0].y
;
430 vb
[i
+ 1].x
= mo_vec
[1].x
;
431 vb
[i
+ 1].y
= mo_vec
[1].y
;
435 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
);
439 case vlMacroBlockTypeFwdPredicted
:
440 case vlMacroBlockTypeBkwdPredicted
:
442 struct vlVertex2f
*vb
;
444 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
447 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
,
448 PIPE_BUFFER_USAGE_CPU_WRITE
451 if (macroblock
->mb_type
== vlMacroBlockTypeBkwdPredicted
)
453 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
454 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
456 if (macroblock
->mo_type
== vlMotionTypeField
)
458 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
459 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
464 mo_vec
[0].x
= macroblock
->PMV
[0][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
465 mo_vec
[0].y
= macroblock
->PMV
[0][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
467 if (macroblock
->mo_type
== vlMotionTypeField
)
469 mo_vec
[1].x
= macroblock
->PMV
[1][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
470 mo_vec
[1].y
= macroblock
->PMV
[1][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
474 if (macroblock
->mo_type
== vlMotionTypeFrame
)
476 for (i
= 0; i
< 24 * 2; i
+= 2)
478 vb
[i
].x
= mo_vec
[0].x
;
479 vb
[i
].y
= mo_vec
[0].y
;
484 for (i
= 0; i
< 24 * 2; i
+= 2)
486 vb
[i
].x
= mo_vec
[0].x
;
487 vb
[i
].y
= mo_vec
[0].y
;
488 vb
[i
+ 1].x
= mo_vec
[1].x
;
489 vb
[i
+ 1].y
= mo_vec
[1].y
;
493 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
);
497 case vlMacroBlockTypeIntra
:
499 const struct vlVertex2f unit
=
501 mc
->surface_tex_inv_size
.x
* VL_MACROBLOCK_WIDTH
,
502 mc
->surface_tex_inv_size
.y
* VL_MACROBLOCK_HEIGHT
504 const struct vlVertex2f half
=
506 mc
->surface_tex_inv_size
.x
* (VL_MACROBLOCK_WIDTH
/ 2),
507 mc
->surface_tex_inv_size
.y
* (VL_MACROBLOCK_HEIGHT
/ 2)
510 struct vlMacroBlockVertexStream0
512 struct vlVertex2f pos
;
513 struct vlVertex2f luma_tc
;
514 struct vlVertex2f cb_tc
;
515 struct vlVertex2f cr_tc
;
518 vb
= (struct vlMacroBlockVertexStream0
*)mc
->pipe
->winsys
->buffer_map
521 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
,
522 PIPE_BUFFER_USAGE_CPU_WRITE
528 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
529 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
530 32, 2, 1, mc
->zero_block
536 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
537 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
538 16, 2, 1, mc
->zero_block
544 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
545 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
546 8, 2, 1, mc
->zero_block
552 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
553 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
554 4, 2, 1, mc
->zero_block
557 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
);
570 struct vlRender
*render
573 struct vlR16SnormBufferedMC
*mc
;
574 struct pipe_context
*pipe
;
575 struct vlVertexShaderConsts
*vs_consts
;
576 unsigned int num_macroblocks
[vlNumMacroBlockExTypes
] = {0};
577 unsigned int offset
[vlNumMacroBlockExTypes
];
578 unsigned int vb_start
= 0;
583 mc
= (struct vlR16SnormBufferedMC
*)render
;
585 if (!mc
->buffered_surface
)
590 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
592 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
594 num_macroblocks
[mb_type_ex
]++;
599 for (i
= 1; i
< vlNumMacroBlockExTypes
; ++i
)
600 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
602 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
604 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
606 vlGrabMacroBlockVB(mc
, &mc
->macroblocks
[i
], offset
[mb_type_ex
]);
608 offset
[mb_type_ex
]++;
611 mc
->render_target
.cbufs
[0] = pipe
->screen
->get_tex_surface
614 mc
->buffered_surface
->texture
,
615 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ
| PIPE_BUFFER_USAGE_GPU_WRITE
618 pipe
->set_framebuffer_state(pipe
, &mc
->render_target
);
619 pipe
->set_viewport_state(pipe
, &mc
->viewport
);
620 vs_consts
= pipe
->winsys
->buffer_map
623 mc
->vs_const_buf
.buffer
,
624 PIPE_BUFFER_USAGE_CPU_WRITE
627 vs_consts
->denorm
.x
= mc
->buffered_surface
->texture
->width
[0];
628 vs_consts
->denorm
.y
= mc
->buffered_surface
->texture
->height
[0];
630 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
631 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_VERTEX
, 0, &mc
->vs_const_buf
);
632 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_FRAGMENT
, 0, &mc
->fs_const_buf
);
634 if (num_macroblocks
[vlMacroBlockExTypeIntra
] > 0)
636 pipe
->set_vertex_buffers(pipe
, 1, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
637 pipe
->set_vertex_elements(pipe
, 4, mc
->vertex_elems
);
638 pipe
->set_sampler_textures(pipe
, 3, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
639 pipe
->bind_sampler_states(pipe
, 3, (void**)mc
->samplers
);
640 pipe
->bind_vs_state(pipe
, mc
->i_vs
);
641 pipe
->bind_fs_state(pipe
, mc
->i_fs
);
643 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeIntra
] * 24);
644 vb_start
+= num_macroblocks
[vlMacroBlockExTypeIntra
] * 24;
647 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] > 0)
649 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
650 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
651 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
652 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
653 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
654 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
655 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
657 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24);
658 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24;
661 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] > 0)
663 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
664 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
665 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
666 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
667 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
668 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
669 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
671 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24);
672 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24;
675 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] > 0)
677 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
678 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
679 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
680 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
681 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
682 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
683 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
685 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24);
686 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24;
689 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] > 0)
691 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
692 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
693 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
694 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
695 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
696 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
697 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
699 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24);
700 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24;
703 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] > 0)
705 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
706 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
707 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
708 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
709 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
710 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
711 pipe
->bind_vs_state(pipe
, mc
->b_vs
[0]);
712 pipe
->bind_fs_state(pipe
, mc
->b_fs
[0]);
714 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24);
715 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24;
718 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] > 0)
720 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
721 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
722 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
723 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
724 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
725 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
726 pipe
->bind_vs_state(pipe
, mc
->b_vs
[1]);
727 pipe
->bind_fs_state(pipe
, mc
->b_fs
[1]);
729 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24);
730 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24;
733 pipe
->flush(pipe
, PIPE_FLUSH_RENDER_CACHE
, &mc
->buffered_surface
->render_fence
);
735 for (i
= 0; i
< 3; ++i
)
736 mc
->zero_block
[i
].x
= -1.0f
;
738 mc
->buffered_surface
= NULL
;
739 mc
->num_macroblocks
= 0;
745 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
747 struct vlRender
*render
,
748 struct vlMpeg2MacroBlockBatch
*batch
,
749 struct vlSurface
*surface
752 struct vlR16SnormBufferedMC
*mc
;
757 mc
= (struct vlR16SnormBufferedMC
*)render
;
759 if (mc
->buffered_surface
)
761 if (mc
->buffered_surface
!= surface
)
764 mc
->buffered_surface
= surface
;
765 mc
->past_surface
= batch
->past_surface
;
766 mc
->future_surface
= batch
->future_surface
;
767 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
768 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
773 mc
->buffered_surface
= surface
;
774 mc
->past_surface
= batch
->past_surface
;
775 mc
->future_surface
= batch
->future_surface
;
776 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
777 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
780 for (i
= 0; i
< batch
->num_macroblocks
; ++i
)
781 vlGrabMacroBlock(mc
, &batch
->macroblocks
[i
]);
788 struct vlRender
*render
798 struct vlRender
*render
801 struct vlR16SnormBufferedMC
*mc
;
802 struct pipe_context
*pipe
;
807 mc
= (struct vlR16SnormBufferedMC
*)render
;
810 for (i
= 0; i
< 5; ++i
)
811 pipe
->delete_sampler_state(pipe
, mc
->samplers
[i
]);
813 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
814 for (i
= 0; i
< 3; ++i
)
815 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vertex_bufs
[h
][i
].buffer
);
817 /* Textures 3 & 4 are not created directly, no need to release them here */
818 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
820 pipe_texture_release(&mc
->textures
[i
][0]);
821 pipe_texture_release(&mc
->textures
[i
][1]);
822 pipe_texture_release(&mc
->textures
[i
][2]);
825 pipe
->delete_vs_state(pipe
, mc
->i_vs
);
826 pipe
->delete_fs_state(pipe
, mc
->i_fs
);
828 for (i
= 0; i
< 2; ++i
)
830 pipe
->delete_vs_state(pipe
, mc
->p_vs
[i
]);
831 pipe
->delete_fs_state(pipe
, mc
->p_fs
[i
]);
832 pipe
->delete_vs_state(pipe
, mc
->b_vs
[i
]);
833 pipe
->delete_fs_state(pipe
, mc
->b_fs
[i
]);
836 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
837 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
839 free(mc
->macroblocks
);
846 * Muliplier renormalizes block samples from 16 bits to 12 bits.
847 * Divider is used when calculating Y % 2 for choosing top or bottom
848 * field for P or B macroblocks.
849 * TODO: Use immediates.
851 static const struct vlFragmentShaderConsts fs_consts
=
853 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
854 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
857 static int vlCreateVertexShaderIMB
859 struct vlR16SnormBufferedMC
*mc
862 const unsigned int max_tokens
= 50;
864 struct pipe_context
*pipe
;
865 struct pipe_shader_state vs
;
866 struct tgsi_token
*tokens
;
867 struct tgsi_header
*header
;
869 struct tgsi_full_declaration decl
;
870 struct tgsi_full_instruction inst
;
878 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
881 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
883 header
= (struct tgsi_header
*)&tokens
[1];
884 *header
= tgsi_build_header();
886 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
891 * decl i0 ; Vertex pos
892 * decl i1 ; Luma texcoords
893 * decl i2 ; Chroma Cb texcoords
894 * decl i3 ; Chroma Cr texcoords
896 for (i
= 0; i
< 4; i
++)
898 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
899 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
903 * decl o0 ; Vertex pos
904 * decl o1 ; Luma texcoords
905 * decl o2 ; Chroma Cb texcoords
906 * decl o3 ; Chroma Cr texcoords
908 for (i
= 0; i
< 4; i
++)
910 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
911 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
915 * mov o0, i0 ; Move input vertex pos to output
916 * mov o1, i1 ; Move input luma texcoords to output
917 * mov o2, i2 ; Move input chroma Cb texcoords to output
918 * mov o3, i3 ; Move input chroma Cr texcoords to output
920 for (i
= 0; i
< 4; ++i
)
922 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
923 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
928 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
931 mc
->i_vs
= pipe
->create_vs_state(pipe
, &vs
);
937 static int vlCreateFragmentShaderIMB
939 struct vlR16SnormBufferedMC
*mc
942 const unsigned int max_tokens
= 100;
944 struct pipe_context
*pipe
;
945 struct pipe_shader_state fs
;
946 struct tgsi_token
*tokens
;
947 struct tgsi_header
*header
;
949 struct tgsi_full_declaration decl
;
950 struct tgsi_full_instruction inst
;
958 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
961 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
963 header
= (struct tgsi_header
*)&tokens
[1];
964 *header
= tgsi_build_header();
966 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
971 * decl i0 ; Luma texcoords
972 * decl i1 ; Chroma Cb texcoords
973 * decl i2 ; Chroma Cr texcoords
975 for (i
= 0; i
< 3; ++i
)
977 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
978 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
981 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
982 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
983 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
985 /* decl o0 ; Fragment color */
986 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
987 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
990 decl
= vl_decl_temps(0, 1);
991 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
994 * decl s0 ; Sampler for luma texture
995 * decl s1 ; Sampler for chroma Cb texture
996 * decl s2 ; Sampler for chroma Cr texture
998 for (i
= 0; i
< 3; ++i
)
1000 decl
= vl_decl_samplers(i
, i
);
1001 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
,max_tokens
- ti
);
1005 * tex2d t1, i0, s0 ; Read texel from luma texture
1006 * mov t0.x, t1.x ; Move luma sample into .x component
1007 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1008 * mov t0.y, t1.x ; Move Cb sample into .y component
1009 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1010 * mov t0.z, t1.x ; Move Cr sample into .z component
1012 for (i
= 0; i
< 3; ++i
)
1014 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1015 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1017 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1018 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1019 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1020 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1021 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1022 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1025 /* mul o0, t0, c0 ; Rescale texel to correct range */
1026 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1027 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1031 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1034 mc
->i_fs
= pipe
->create_fs_state(pipe
, &fs
);
1040 static int vlCreateVertexShaderFramePMB
1042 struct vlR16SnormBufferedMC
*mc
1045 const unsigned int max_tokens
= 100;
1047 struct pipe_context
*pipe
;
1048 struct pipe_shader_state vs
;
1049 struct tgsi_token
*tokens
;
1050 struct tgsi_header
*header
;
1052 struct tgsi_full_declaration decl
;
1053 struct tgsi_full_instruction inst
;
1061 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1064 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1066 header
= (struct tgsi_header
*)&tokens
[1];
1067 *header
= tgsi_build_header();
1069 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1074 * decl i0 ; Vertex pos
1075 * decl i1 ; Luma texcoords
1076 * decl i2 ; Chroma Cb texcoords
1077 * decl i3 ; Chroma Cr texcoords
1078 * decl i4 ; Ref surface top field texcoords
1079 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1081 for (i
= 0; i
< 6; i
++)
1083 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1084 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1088 * decl o0 ; Vertex pos
1089 * decl o1 ; Luma texcoords
1090 * decl o2 ; Chroma Cb texcoords
1091 * decl o3 ; Chroma Cr texcoords
1092 * decl o4 ; Ref macroblock texcoords
1094 for (i
= 0; i
< 5; i
++)
1096 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1097 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1101 * mov o0, i0 ; Move input vertex pos to output
1102 * mov o1, i1 ; Move input luma texcoords to output
1103 * mov o2, i2 ; Move input chroma Cb texcoords to output
1104 * mov o3, i3 ; Move input chroma Cr texcoords to output
1106 for (i
= 0; i
< 4; ++i
)
1108 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1109 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1112 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1113 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
1114 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1118 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1121 mc
->p_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1127 static int vlCreateVertexShaderFieldPMB
1129 struct vlR16SnormBufferedMC
*mc
1132 const unsigned int max_tokens
= 100;
1134 struct pipe_context
*pipe
;
1135 struct pipe_shader_state vs
;
1136 struct tgsi_token
*tokens
;
1137 struct tgsi_header
*header
;
1139 struct tgsi_full_declaration decl
;
1140 struct tgsi_full_instruction inst
;
1148 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1151 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1153 header
= (struct tgsi_header
*)&tokens
[1];
1154 *header
= tgsi_build_header();
1156 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1161 * decl i0 ; Vertex pos
1162 * decl i1 ; Luma texcoords
1163 * decl i2 ; Chroma Cb texcoords
1164 * decl i3 ; Chroma Cr texcoords
1165 * decl i4 ; Ref macroblock top field texcoords
1166 * decl i5 ; Ref macroblock bottom field texcoords
1168 for (i
= 0; i
< 6; i
++)
1170 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1171 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1174 /* decl c0 ; Render target dimensions */
1175 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1176 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1179 * decl o0 ; Vertex pos
1180 * decl o1 ; Luma texcoords
1181 * decl o2 ; Chroma Cb texcoords
1182 * decl o3 ; Chroma Cr texcoords
1183 * decl o4 ; Ref macroblock top field texcoords
1184 * decl o5 ; Ref macroblock bottom field texcoords
1185 * decl o6 ; Denormalized vertex pos
1187 for (i
= 0; i
< 7; i
++)
1189 decl
= vl_decl_output((i
== 0 || i
== 6) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1190 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1194 * mov o0, i0 ; Move input vertex pos to output
1195 * mov o1, i1 ; Move input luma texcoords to output
1196 * mov o2, i2 ; Move input chroma Cb texcoords to output
1197 * mov o3, i3 ; Move input chroma Cr texcoords to output
1199 for (i
= 0; i
< 4; ++i
)
1201 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1202 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1206 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1207 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1209 for (i
= 0; i
< 2; ++i
)
1211 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1212 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1215 /* mul o6, i0, c0 ; Denorm vertex pos */
1216 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 6, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1217 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1221 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1224 mc
->p_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1230 static int vlCreateFragmentShaderFramePMB
1232 struct vlR16SnormBufferedMC
*mc
1235 const unsigned int max_tokens
= 100;
1237 struct pipe_context
*pipe
;
1238 struct pipe_shader_state fs
;
1239 struct tgsi_token
*tokens
;
1240 struct tgsi_header
*header
;
1242 struct tgsi_full_declaration decl
;
1243 struct tgsi_full_instruction inst
;
1251 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1254 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1256 header
= (struct tgsi_header
*)&tokens
[1];
1257 *header
= tgsi_build_header();
1259 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1264 * decl i0 ; Luma texcoords
1265 * decl i1 ; Chroma Cb texcoords
1266 * decl i2 ; Chroma Cr texcoords
1267 * decl i3 ; Ref macroblock texcoords
1269 for (i
= 0; i
< 4; ++i
)
1271 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1272 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1275 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1276 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1277 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1279 /* decl o0 ; Fragment color */
1280 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1281 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1284 decl
= vl_decl_temps(0, 1);
1285 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1288 * decl s0 ; Sampler for luma texture
1289 * decl s1 ; Sampler for chroma Cb texture
1290 * decl s2 ; Sampler for chroma Cr texture
1291 * decl s3 ; Sampler for ref surface texture
1293 for (i
= 0; i
< 4; ++i
)
1295 decl
= vl_decl_samplers(i
, i
);
1296 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1300 * tex2d t1, i0, s0 ; Read texel from luma texture
1301 * mov t0.x, t1.x ; Move luma sample into .x component
1302 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1303 * mov t0.y, t1.x ; Move Cb sample into .y component
1304 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1305 * mov t0.z, t1.x ; Move Cr sample into .z component
1307 for (i
= 0; i
< 3; ++i
)
1309 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1310 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1312 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1313 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1314 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1315 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1316 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1317 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1320 /* mul t0, t0, c0 ; Rescale texel to correct range */
1321 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1322 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1324 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1325 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
1326 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1328 /* add o0, t0, t1 ; Add ref and differential to form final output */
1329 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1330 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1334 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1337 mc
->p_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1343 static int vlCreateFragmentShaderFieldPMB
1345 struct vlR16SnormBufferedMC
*mc
1348 const unsigned int max_tokens
= 200;
1350 struct pipe_context
*pipe
;
1351 struct pipe_shader_state fs
;
1352 struct tgsi_token
*tokens
;
1353 struct tgsi_header
*header
;
1355 struct tgsi_full_declaration decl
;
1356 struct tgsi_full_instruction inst
;
1364 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1367 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1369 header
= (struct tgsi_header
*)&tokens
[1];
1370 *header
= tgsi_build_header();
1372 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1377 * decl i0 ; Luma texcoords
1378 * decl i1 ; Chroma Cb texcoords
1379 * decl i2 ; Chroma Cr texcoords
1380 * decl i3 ; Ref macroblock top field texcoords
1381 * decl i4 ; Ref macroblock bottom field texcoords
1382 * decl i5 ; Denormalized vertex pos
1384 for (i
= 0; i
< 6; ++i
)
1386 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1387 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1391 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1392 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1394 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1395 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1397 /* decl o0 ; Fragment color */
1398 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1399 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1402 decl
= vl_decl_temps(0, 4);
1403 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1406 * decl s0 ; Sampler for luma texture
1407 * decl s1 ; Sampler for chroma Cb texture
1408 * decl s2 ; Sampler for chroma Cr texture
1409 * decl s3 ; Sampler for ref surface texture
1411 for (i
= 0; i
< 4; ++i
)
1413 decl
= vl_decl_samplers(i
, i
);
1414 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1418 * tex2d t1, i0, s0 ; Read texel from luma texture
1419 * mov t0.x, t1.x ; Move luma sample into .x component
1420 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1421 * mov t0.y, t1.x ; Move Cb sample into .y component
1422 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1423 * mov t0.z, t1.x ; Move Cr sample into .z component
1425 for (i
= 0; i
< 3; ++i
)
1427 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1428 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1430 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1431 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1432 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1433 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1434 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1435 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1438 /* mul t0, t0, c0 ; Rescale texel to correct range */
1439 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1440 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1443 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1444 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1446 for (i
= 0; i
< 2; ++i
)
1448 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
1449 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1452 /* XXX: Pos values off by 0.5? */
1453 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1454 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 5, TGSI_FILE_CONSTANT
, 1);
1455 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1456 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1457 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1458 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1459 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1460 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1461 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1462 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1463 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1465 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1466 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1467 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1468 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1469 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1470 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1471 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1473 /* floor t3, t3 ; Get rid of fractional part */
1474 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1475 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1477 /* mul t3, t3, c1.y ; Multiply by 2 */
1478 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1479 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1480 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1481 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1482 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1483 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1485 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1486 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1487 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1489 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1490 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1491 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1492 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1494 /* add o0, t0, t1 ; Add ref and differential to form final output */
1495 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1496 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1500 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1503 mc
->p_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
1509 static int vlCreateVertexShaderFrameBMB
1511 struct vlR16SnormBufferedMC
*mc
1514 const unsigned int max_tokens
= 100;
1516 struct pipe_context
*pipe
;
1517 struct pipe_shader_state vs
;
1518 struct tgsi_token
*tokens
;
1519 struct tgsi_header
*header
;
1521 struct tgsi_full_declaration decl
;
1522 struct tgsi_full_instruction inst
;
1530 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1533 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1535 header
= (struct tgsi_header
*)&tokens
[1];
1536 *header
= tgsi_build_header();
1538 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1543 * decl i0 ; Vertex pos
1544 * decl i1 ; Luma texcoords
1545 * decl i2 ; Chroma Cb texcoords
1546 * decl i3 ; Chroma Cr texcoords
1547 * decl i4 ; First ref macroblock top field texcoords
1548 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1549 * decl i6 ; Second ref macroblock top field texcoords
1550 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1552 for (i
= 0; i
< 8; i
++)
1554 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1555 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1559 * decl o0 ; Vertex pos
1560 * decl o1 ; Luma texcoords
1561 * decl o2 ; Chroma Cb texcoords
1562 * decl o3 ; Chroma Cr texcoords
1563 * decl o4 ; First ref macroblock texcoords
1564 * decl o5 ; Second ref macroblock texcoords
1566 for (i
= 0; i
< 6; i
++)
1568 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1569 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1573 * mov o0, i0 ; Move input vertex pos to output
1574 * mov o1, i1 ; Move input luma texcoords to output
1575 * mov o2, i2 ; Move input chroma Cb texcoords to output
1576 * mov o3, i3 ; Move input chroma Cr texcoords to output
1578 for (i
= 0; i
< 4; ++i
)
1580 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1581 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1585 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1586 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1588 for (i
= 0; i
< 2; ++i
)
1590 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
1591 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1596 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1599 mc
->b_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1605 static int vlCreateVertexShaderFieldBMB
1607 struct vlR16SnormBufferedMC
*mc
1610 const unsigned int max_tokens
= 100;
1612 struct pipe_context
*pipe
;
1613 struct pipe_shader_state vs
;
1614 struct tgsi_token
*tokens
;
1615 struct tgsi_header
*header
;
1617 struct tgsi_full_declaration decl
;
1618 struct tgsi_full_instruction inst
;
1626 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1629 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1631 header
= (struct tgsi_header
*)&tokens
[1];
1632 *header
= tgsi_build_header();
1634 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1639 * decl i0 ; Vertex pos
1640 * decl i1 ; Luma texcoords
1641 * decl i2 ; Chroma Cb texcoords
1642 * decl i3 ; Chroma Cr texcoords
1643 * decl i4 ; First ref macroblock top field texcoords
1644 * decl i5 ; First ref macroblock bottom field texcoords
1645 * decl i6 ; Second ref macroblock top field texcoords
1646 * decl i7 ; Second ref macroblock bottom field texcoords
1648 for (i
= 0; i
< 8; i
++)
1650 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1651 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1654 /* decl c0 ; Render target dimensions */
1655 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1656 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1659 * decl o0 ; Vertex pos
1660 * decl o1 ; Luma texcoords
1661 * decl o2 ; Chroma Cb texcoords
1662 * decl o3 ; Chroma Cr texcoords
1663 * decl o4 ; First ref macroblock top field texcoords
1664 * decl o5 ; First ref macroblock Bottom field texcoords
1665 * decl o6 ; Second ref macroblock top field texcoords
1666 * decl o7 ; Second ref macroblock Bottom field texcoords
1667 * decl o8 ; Denormalized vertex pos
1669 for (i
= 0; i
< 9; i
++)
1671 decl
= vl_decl_output((i
== 0 || i
== 8) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1672 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1676 decl
= vl_decl_temps(0, 1);
1677 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1680 * mov o0, i0 ; Move input vertex pos to output
1681 * mov o1, i1 ; Move input luma texcoords to output
1682 * mov o2, i2 ; Move input chroma Cb texcoords to output
1683 * mov o3, i3 ; Move input chroma Cr texcoords to output
1685 for (i
= 0; i
< 4; ++i
)
1687 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1688 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1692 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1693 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1694 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1695 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1697 for (i
= 0; i
< 4; ++i
)
1699 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1700 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1703 /* mul o8, i0, c0 ; Denorm vertex pos */
1704 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 8, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1705 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1709 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1712 mc
->b_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1718 static int vlCreateFragmentShaderFrameBMB
1720 struct vlR16SnormBufferedMC
*mc
1723 const unsigned int max_tokens
= 100;
1725 struct pipe_context
*pipe
;
1726 struct pipe_shader_state fs
;
1727 struct tgsi_token
*tokens
;
1728 struct tgsi_header
*header
;
1730 struct tgsi_full_declaration decl
;
1731 struct tgsi_full_instruction inst
;
1739 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1742 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1744 header
= (struct tgsi_header
*)&tokens
[1];
1745 *header
= tgsi_build_header();
1747 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1752 * decl i0 ; Luma texcoords
1753 * decl i1 ; Chroma Cb texcoords
1754 * decl i2 ; Chroma Cr texcoords
1755 * decl i3 ; First ref macroblock texcoords
1756 * decl i4 ; Second ref macroblock texcoords
1758 for (i
= 0; i
< 5; ++i
)
1760 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1761 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1765 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1766 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1768 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1769 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1771 /* decl o0 ; Fragment color */
1772 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1773 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1776 decl
= vl_decl_temps(0, 2);
1777 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1780 * decl s0 ; Sampler for luma texture
1781 * decl s1 ; Sampler for chroma Cb texture
1782 * decl s2 ; Sampler for chroma Cr texture
1783 * decl s3 ; Sampler for first ref surface texture
1784 * decl s4 ; Sampler for second ref surface texture
1786 for (i
= 0; i
< 5; ++i
)
1788 decl
= vl_decl_samplers(i
, i
);
1789 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1793 * tex2d t1, i0, s0 ; Read texel from luma texture
1794 * mov t0.x, t1.x ; Move luma sample into .x component
1795 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1796 * mov t0.y, t1.x ; Move Cb sample into .y component
1797 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1798 * mov t0.z, t1.x ; Move Cr sample into .z component
1800 for (i
= 0; i
< 3; ++i
)
1802 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1803 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1805 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1806 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1807 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1808 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1809 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1810 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1813 /* mul t0, t0, c0 ; Rescale texel to correct range */
1814 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1815 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1818 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1819 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1821 for (i
= 0; i
< 2; ++i
)
1823 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
1824 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1827 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1828 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1829 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1830 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1831 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1832 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1833 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1835 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1836 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1837 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1841 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1844 mc
->b_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1850 static int vlCreateFragmentShaderFieldBMB
1852 struct vlR16SnormBufferedMC
*mc
1855 const unsigned int max_tokens
= 200;
1857 struct pipe_context
*pipe
;
1858 struct pipe_shader_state fs
;
1859 struct tgsi_token
*tokens
;
1860 struct tgsi_header
*header
;
1862 struct tgsi_full_declaration decl
;
1863 struct tgsi_full_instruction inst
;
1871 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1874 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1876 header
= (struct tgsi_header
*)&tokens
[1];
1877 *header
= tgsi_build_header();
1879 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1884 * decl i0 ; Luma texcoords
1885 * decl i1 ; Chroma Cb texcoords
1886 * decl i2 ; Chroma Cr texcoords
1887 * decl i3 ; First ref macroblock top field texcoords
1888 * decl i4 ; First ref macroblock bottom field texcoords
1889 * decl i5 ; Second ref macroblock top field texcoords
1890 * decl i6 ; Second ref macroblock bottom field texcoords
1891 * decl i7 ; Denormalized vertex pos
1893 for (i
= 0; i
< 8; ++i
)
1895 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1896 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1900 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1901 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1902 * ; and for Y-mod-2 top/bottom field selection
1904 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1905 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1907 /* decl o0 ; Fragment color */
1908 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1909 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1912 decl
= vl_decl_temps(0, 5);
1913 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1916 * decl s0 ; Sampler for luma texture
1917 * decl s1 ; Sampler for chroma Cb texture
1918 * decl s2 ; Sampler for chroma Cr texture
1919 * decl s3 ; Sampler for first ref surface texture
1920 * decl s4 ; Sampler for second ref surface texture
1922 for (i
= 0; i
< 5; ++i
)
1924 decl
= vl_decl_samplers(i
, i
);
1925 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1929 * tex2d t1, i0, s0 ; Read texel from luma texture
1930 * mov t0.x, t1.x ; Move luma sample into .x component
1931 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1932 * mov t0.y, t1.x ; Move Cb sample into .y component
1933 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1934 * mov t0.z, t1.x ; Move Cr sample into .z component
1936 for (i
= 0; i
< 3; ++i
)
1938 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1939 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1941 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1942 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1943 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1944 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1945 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1946 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1949 /* mul t0, t0, c0 ; Rescale texel to correct range */
1950 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1951 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1953 /* XXX: Pos values off by 0.5? */
1954 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1955 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 7, TGSI_FILE_CONSTANT
, 1);
1956 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1957 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1958 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1959 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1960 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1961 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1962 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1963 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1964 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1966 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1967 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1968 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1969 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1970 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1971 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1972 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1974 /* floor t3, t3 ; Get rid of fractional part */
1975 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1976 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1978 /* mul t3, t3, c1.y ; Multiply by 2 */
1979 inst
= vl_inst3( TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1980 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1981 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1982 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1983 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1984 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1986 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1987 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1988 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1991 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
1992 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
1994 for (i
= 0; i
< 2; ++i
)
1996 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
1997 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2000 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2001 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2002 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2003 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2006 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2007 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2009 for (i
= 0; i
< 2; ++i
)
2011 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 4, TGSI_FILE_INPUT
, i
+ 5, TGSI_FILE_SAMPLER
, 4);
2012 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2015 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2016 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2017 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 2, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 5);
2018 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2020 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2021 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2022 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2023 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2024 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2025 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2026 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2028 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2029 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
2030 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2034 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2037 mc
->b_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
2043 static int vlCreateDataBufs
2045 struct vlR16SnormBufferedMC
*mc
2048 const unsigned int mbw
= align(mc
->picture_width
, VL_MACROBLOCK_WIDTH
) / VL_MACROBLOCK_WIDTH
;
2049 const unsigned int mbh
= align(mc
->picture_height
, VL_MACROBLOCK_HEIGHT
) / VL_MACROBLOCK_HEIGHT
;
2050 const unsigned int num_mb_per_frame
= mbw
* mbh
;
2052 struct pipe_context
*pipe
;
2059 /* Create our vertex buffers */
2060 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
2062 mc
->vertex_bufs
[h
][0].pitch
= sizeof(struct vlVertex2f
) * 4;
2063 mc
->vertex_bufs
[h
][0].max_index
= 24 * num_mb_per_frame
- 1;
2064 mc
->vertex_bufs
[h
][0].buffer_offset
= 0;
2065 mc
->vertex_bufs
[h
][0].buffer
= pipe
->winsys
->buffer_create
2069 PIPE_BUFFER_USAGE_VERTEX
,
2070 sizeof(struct vlVertex2f
) * 4 * 24 * num_mb_per_frame
2073 for (i
= 1; i
< 3; ++i
)
2075 mc
->vertex_bufs
[h
][i
].pitch
= sizeof(struct vlVertex2f
) * 2;
2076 mc
->vertex_bufs
[h
][i
].max_index
= 24 * num_mb_per_frame
- 1;
2077 mc
->vertex_bufs
[h
][i
].buffer_offset
= 0;
2078 mc
->vertex_bufs
[h
][i
].buffer
= pipe
->winsys
->buffer_create
2082 PIPE_BUFFER_USAGE_VERTEX
,
2083 sizeof(struct vlVertex2f
) * 2 * 24 * num_mb_per_frame
2088 /* Position element */
2089 mc
->vertex_elems
[0].src_offset
= 0;
2090 mc
->vertex_elems
[0].vertex_buffer_index
= 0;
2091 mc
->vertex_elems
[0].nr_components
= 2;
2092 mc
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2094 /* Luma, texcoord element */
2095 mc
->vertex_elems
[1].src_offset
= sizeof(struct vlVertex2f
);
2096 mc
->vertex_elems
[1].vertex_buffer_index
= 0;
2097 mc
->vertex_elems
[1].nr_components
= 2;
2098 mc
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2100 /* Chroma Cr texcoord element */
2101 mc
->vertex_elems
[2].src_offset
= sizeof(struct vlVertex2f
) * 2;
2102 mc
->vertex_elems
[2].vertex_buffer_index
= 0;
2103 mc
->vertex_elems
[2].nr_components
= 2;
2104 mc
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2106 /* Chroma Cb texcoord element */
2107 mc
->vertex_elems
[3].src_offset
= sizeof(struct vlVertex2f
) * 3;
2108 mc
->vertex_elems
[3].vertex_buffer_index
= 0;
2109 mc
->vertex_elems
[3].nr_components
= 2;
2110 mc
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2112 /* First ref surface top field texcoord element */
2113 mc
->vertex_elems
[4].src_offset
= 0;
2114 mc
->vertex_elems
[4].vertex_buffer_index
= 1;
2115 mc
->vertex_elems
[4].nr_components
= 2;
2116 mc
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2118 /* First ref surface bottom field texcoord element */
2119 mc
->vertex_elems
[5].src_offset
= sizeof(struct vlVertex2f
);
2120 mc
->vertex_elems
[5].vertex_buffer_index
= 1;
2121 mc
->vertex_elems
[5].nr_components
= 2;
2122 mc
->vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2124 /* Second ref surface top field texcoord element */
2125 mc
->vertex_elems
[6].src_offset
= 0;
2126 mc
->vertex_elems
[6].vertex_buffer_index
= 2;
2127 mc
->vertex_elems
[6].nr_components
= 2;
2128 mc
->vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2130 /* Second ref surface bottom field texcoord element */
2131 mc
->vertex_elems
[7].src_offset
= sizeof(struct vlVertex2f
);
2132 mc
->vertex_elems
[7].vertex_buffer_index
= 2;
2133 mc
->vertex_elems
[7].nr_components
= 2;
2134 mc
->vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2136 /* Create our constant buffer */
2137 mc
->vs_const_buf
.size
= sizeof(struct vlVertexShaderConsts
);
2138 mc
->vs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2142 PIPE_BUFFER_USAGE_CONSTANT
,
2143 mc
->vs_const_buf
.size
2146 mc
->fs_const_buf
.size
= sizeof(struct vlFragmentShaderConsts
);
2147 mc
->fs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2151 PIPE_BUFFER_USAGE_CONSTANT
,
2152 mc
->fs_const_buf
.size
2157 pipe
->winsys
->buffer_map(pipe
->winsys
, mc
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
2159 sizeof(struct vlFragmentShaderConsts
)
2162 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
2164 mc
->macroblocks
= malloc(sizeof(struct vlMpeg2MacroBlock
) * num_mb_per_frame
);
2171 struct vlR16SnormBufferedMC
*mc
2174 struct pipe_context
*pipe
;
2175 struct pipe_sampler_state sampler
;
2176 struct pipe_texture
template;
2177 unsigned int filters
[5];
2184 /* For MC we render to textures, which are rounded up to nearest POT */
2185 mc
->viewport
.scale
[0] = vlRoundUpPOT(mc
->picture_width
);
2186 mc
->viewport
.scale
[1] = vlRoundUpPOT(mc
->picture_height
);
2187 mc
->viewport
.scale
[2] = 1;
2188 mc
->viewport
.scale
[3] = 1;
2189 mc
->viewport
.translate
[0] = 0;
2190 mc
->viewport
.translate
[1] = 0;
2191 mc
->viewport
.translate
[2] = 0;
2192 mc
->viewport
.translate
[3] = 0;
2194 mc
->render_target
.width
= vlRoundUpPOT(mc
->picture_width
);
2195 mc
->render_target
.height
= vlRoundUpPOT(mc
->picture_height
);
2196 mc
->render_target
.num_cbufs
= 1;
2197 /* FB for MC stage is a vlSurface created by the user, set at render time */
2198 mc
->render_target
.zsbuf
= NULL
;
2200 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
2201 /* FIXME: Linear causes discoloration around block edges */
2202 filters
[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2203 filters
[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2204 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
2205 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
2207 for (i
= 0; i
< 5; ++i
)
2209 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2210 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2211 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2212 sampler
.min_img_filter
= filters
[i
];
2213 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
2214 sampler
.mag_img_filter
= filters
[i
];
2215 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
2216 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
2217 sampler
.normalized_coords
= 1;
2218 /*sampler.prefilter = ;*/
2219 /*sampler.shadow_ambient = ;*/
2220 /*sampler.lod_bias = ;*/
2221 sampler
.min_lod
= 0;
2222 /*sampler.max_lod = ;*/
2223 /*sampler.border_color[i] = ;*/
2224 /*sampler.max_anisotropy = ;*/
2225 mc
->samplers
[i
] = pipe
->create_sampler_state(pipe
, &sampler
);
2228 memset(&template, 0, sizeof(struct pipe_texture
));
2229 template.target
= PIPE_TEXTURE_2D
;
2230 template.format
= PIPE_FORMAT_R16_SNORM
;
2231 template.last_level
= 0;
2232 template.width
[0] = vlRoundUpPOT(mc
->picture_width
);
2233 template.height
[0] = vlRoundUpPOT(mc
->picture_height
);
2234 template.depth
[0] = 1;
2235 template.compressed
= 0;
2236 pf_get_block(template.format
, &template.block
);
2238 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2239 mc
->textures
[i
][0] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2241 if (mc
->picture_format
== vlFormatYCbCr420
)
2243 template.width
[0] = vlRoundUpPOT(mc
->picture_width
/ 2);
2244 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2246 else if (mc
->picture_format
== vlFormatYCbCr422
)
2247 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2249 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2251 mc
->textures
[i
][1] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2252 mc
->textures
[i
][2] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2255 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2257 vlCreateVertexShaderIMB(mc
);
2258 vlCreateFragmentShaderIMB(mc
);
2259 vlCreateVertexShaderFramePMB(mc
);
2260 vlCreateVertexShaderFieldPMB(mc
);
2261 vlCreateFragmentShaderFramePMB(mc
);
2262 vlCreateFragmentShaderFieldPMB(mc
);
2263 vlCreateVertexShaderFrameBMB(mc
);
2264 vlCreateVertexShaderFieldBMB(mc
);
2265 vlCreateFragmentShaderFrameBMB(mc
);
2266 vlCreateFragmentShaderFieldBMB(mc
);
2267 vlCreateDataBufs(mc
);
2272 int vlCreateR16SNormBufferedMC
2274 struct pipe_context
*pipe
,
2275 unsigned int picture_width
,
2276 unsigned int picture_height
,
2277 enum vlFormat picture_format
,
2278 struct vlRender
**render
2281 struct vlR16SnormBufferedMC
*mc
;
2287 mc
= calloc(1, sizeof(struct vlR16SnormBufferedMC
));
2289 mc
->base
.vlBegin
= &vlBegin
;
2290 mc
->base
.vlRenderMacroBlocksMpeg2
= &vlRenderMacroBlocksMpeg2R16SnormBuffered
;
2291 mc
->base
.vlEnd
= &vlEnd
;
2292 mc
->base
.vlFlush
= &vlFlush
;
2293 mc
->base
.vlDestroy
= &vlDestroy
;
2295 mc
->picture_width
= picture_width
;
2296 mc
->picture_height
= picture_height
;
2299 mc
->buffered_surface
= NULL
;
2300 mc
->past_surface
= NULL
;
2301 mc
->future_surface
= NULL
;
2302 for (i
= 0; i
< 3; ++i
)
2303 mc
->zero_block
[i
].x
= -1.0f
;
2304 mc
->num_macroblocks
= 0;
2308 *render
= &mc
->base
;