2 #include "vl_r16snorm_mc_buf.h"
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
31 enum vlMacroBlockTypeEx
33 vlMacroBlockExTypeIntra
,
34 vlMacroBlockExTypeFwdPredictedFrame
,
35 vlMacroBlockExTypeFwdPredictedField
,
36 vlMacroBlockExTypeBkwdPredictedFrame
,
37 vlMacroBlockExTypeBkwdPredictedField
,
38 vlMacroBlockExTypeBiPredictedFrame
,
39 vlMacroBlockExTypeBiPredictedField
,
41 vlNumMacroBlockExTypes
44 struct vlVertexShaderConsts
46 struct vlVertex4f denorm
;
49 struct vlFragmentShaderConsts
51 struct vlVertex4f multiplier
;
52 struct vlVertex4f div
;
55 struct vlR16SnormBufferedMC
59 unsigned int picture_width
, picture_height
;
60 enum vlFormat picture_format
;
63 struct vlSurface
*buffered_surface
;
64 struct vlSurface
*past_surface
, *future_surface
;
65 struct vlVertex2f surface_tex_inv_size
;
66 unsigned int num_macroblocks
;
67 struct vlMpeg2MacroBlock
*macroblocks
;
69 struct pipe_context
*pipe
;
70 struct pipe_viewport_state viewport
;
71 struct pipe_framebuffer_state render_target
;
72 struct pipe_sampler_state
*samplers
[5];
73 struct pipe_texture
*textures
[NUM_BUF_SETS
][5];
74 void *i_vs
, *p_vs
[2], *b_vs
[2];
75 void *i_fs
, *p_fs
[2], *b_fs
[2];
76 struct pipe_vertex_buffer vertex_bufs
[NUM_BUF_SETS
][3];
77 struct pipe_vertex_element vertex_elems
[8];
78 struct pipe_constant_buffer vs_const_buf
, fs_const_buf
;
83 struct vlRender
*render
91 static inline int vlGrabFrameCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
95 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
99 src
+ y
* VL_BLOCK_WIDTH
,
106 static inline int vlGrabFieldCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
110 for (y
= 0; y
< VL_BLOCK_HEIGHT
/ 2; ++y
)
113 dst
+ y
* dst_pitch
* 2,
114 src
+ y
* VL_BLOCK_WIDTH
,
118 dst
+= VL_BLOCK_HEIGHT
* dst_pitch
;
120 for (; y
< VL_BLOCK_HEIGHT
; ++y
)
123 dst
+ y
* dst_pitch
* 2,
124 src
+ y
* VL_BLOCK_WIDTH
,
131 static inline int vlGrabNoBlock(short *dst
, unsigned int dst_pitch
)
135 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
146 static inline int vlGrabBlocks
148 struct vlR16SnormBufferedMC
*mc
,
151 enum vlDCTType dct_type
,
152 unsigned int coded_block_pattern
,
156 struct pipe_surface
*tex_surface
;
158 unsigned int tex_pitch
;
159 unsigned int x
, y
, tb
= 0, sb
= 0;
160 unsigned int mbpx
= mbx
* VL_MACROBLOCK_WIDTH
, mbpy
= mby
* VL_MACROBLOCK_HEIGHT
;
165 tex_surface
= mc
->pipe
->screen
->get_tex_surface
168 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][0],
169 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
172 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
173 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
175 texels
+= mbpy
* tex_pitch
+ mbpx
;
177 for (y
= 0; y
< 2; ++y
)
179 for (x
= 0; x
< 2; ++x
, ++tb
)
181 if ((coded_block_pattern
>> (5 - tb
)) & 1)
183 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
185 if (dct_type
== vlDCTTypeFrameCoded
)
187 vlGrabFrameCodedBlock
190 texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
,
196 vlGrabFieldCodedBlock
199 texels
+ y
* tex_pitch
+ x
* VL_BLOCK_WIDTH
,
207 vlGrabNoBlock(texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
, tex_pitch
);
211 pipe_surface_unmap(tex_surface
);
213 /* TODO: Implement 422, 444 */
217 for (tb
= 0; tb
< 2; ++tb
)
219 tex_surface
= mc
->pipe
->screen
->get_tex_surface
222 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][tb
+ 1],
223 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
226 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
227 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
229 texels
+= mbpy
* tex_pitch
+ mbpx
;
231 if ((coded_block_pattern
>> (1 - tb
)) & 1)
233 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
235 vlGrabFrameCodedBlock
245 vlGrabNoBlock(texels
, tex_pitch
);
247 pipe_surface_unmap(tex_surface
);
253 static inline enum vlMacroBlockTypeEx
vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock
*mb
)
259 case vlMacroBlockTypeIntra
:
260 return vlMacroBlockExTypeIntra
;
261 case vlMacroBlockTypeFwdPredicted
:
262 return mb
->mo_type
== vlMotionTypeFrame
?
263 vlMacroBlockExTypeFwdPredictedFrame
: vlMacroBlockExTypeFwdPredictedField
;
264 case vlMacroBlockTypeBkwdPredicted
:
265 return mb
->mo_type
== vlMotionTypeFrame
?
266 vlMacroBlockExTypeBkwdPredictedFrame
: vlMacroBlockExTypeBkwdPredictedField
;
267 case vlMacroBlockTypeBiPredicted
:
268 return mb
->mo_type
== vlMotionTypeFrame
?
269 vlMacroBlockExTypeBiPredictedFrame
: vlMacroBlockExTypeBiPredictedField
;
278 static inline int vlGrabMacroBlock
280 struct vlR16SnormBufferedMC
*mc
,
281 struct vlMpeg2MacroBlock
*macroblock
287 mc
->macroblocks
[mc
->num_macroblocks
].mbx
= macroblock
->mbx
;
288 mc
->macroblocks
[mc
->num_macroblocks
].mby
= macroblock
->mby
;
289 mc
->macroblocks
[mc
->num_macroblocks
].mb_type
= macroblock
->mb_type
;
290 mc
->macroblocks
[mc
->num_macroblocks
].mo_type
= macroblock
->mo_type
;
291 mc
->macroblocks
[mc
->num_macroblocks
].dct_type
= macroblock
->dct_type
;
292 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][0] = macroblock
->PMV
[0][0][0];
293 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][0][1] = macroblock
->PMV
[0][0][1];
294 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][0] = macroblock
->PMV
[0][1][0];
295 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[0][1][1] = macroblock
->PMV
[0][1][1];
296 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][0] = macroblock
->PMV
[1][0][0];
297 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][0][1] = macroblock
->PMV
[1][0][1];
298 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][0] = macroblock
->PMV
[1][1][0];
299 mc
->macroblocks
[mc
->num_macroblocks
].PMV
[1][1][1] = macroblock
->PMV
[1][1][1];
300 mc
->macroblocks
[mc
->num_macroblocks
].cbp
= macroblock
->cbp
;
301 mc
->macroblocks
[mc
->num_macroblocks
].blocks
= macroblock
->blocks
;
308 macroblock
->dct_type
,
313 mc
->num_macroblocks
++;
318 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zx, zy) \
319 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
320 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
321 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
322 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
323 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
324 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
326 /*if ((cbp) & (lm)) \
328 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
329 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
330 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
331 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
332 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
333 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
337 (vb)[0].luma_tc.x = (zx); (vb)[0].luma_tc.y = (zy); \
338 (vb)[1].luma_tc.x = (zx); (vb)[1].luma_tc.y = (zy) + (hy); \
339 (vb)[2].luma_tc.x = (zx) + (hx); (vb)[2].luma_tc.y = (zy); \
340 (vb)[3].luma_tc.x = (zx) + (hx); (vb)[3].luma_tc.y = (zy); \
341 (vb)[4].luma_tc.x = (zx); (vb)[4].luma_tc.y = (zy) + (hy); \
342 (vb)[5].luma_tc.x = ((zx) + (hx); (vb)[5].luma_tc.y = (zy) + (hy); \
345 /*if ((cbp) & (cbm)) \
347 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
348 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
349 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
350 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
351 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
352 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
356 (vb)[0].cb_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \
357 (vb)[1].cb_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \
358 (vb)[2].cb_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \
359 (vb)[3].cb_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \
360 (vb)[4].cb_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \
361 (vb)[5].cb_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \
364 /*if ((cbp) & (crm)) \
366 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
367 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
368 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
369 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
370 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
371 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
375 (vb)[0].cr_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \
376 (vb)[1].cr_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \
377 (vb)[2].cr_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \
378 (vb)[3].cr_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \
379 (vb)[4].cr_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \
380 (vb)[5].cr_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \
383 static inline int vlGrabMacroBlockVB
385 struct vlR16SnormBufferedMC
*mc
,
386 struct vlMpeg2MacroBlock
*macroblock
,
390 struct vlVertex2f mo_vec
[2];
396 switch (macroblock
->mb_type
)
398 case vlMacroBlockTypeBiPredicted
:
400 struct vlVertex2f
*vb
;
402 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
405 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
,
406 PIPE_BUFFER_USAGE_CPU_WRITE
409 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
410 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
412 if (macroblock
->mo_type
== vlMotionTypeFrame
)
414 for (i
= 0; i
< 24 * 2; i
+= 2)
416 vb
[i
].x
= mo_vec
[0].x
;
417 vb
[i
].y
= mo_vec
[0].y
;
422 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
423 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
425 for (i
= 0; i
< 24 * 2; i
+= 2)
427 vb
[i
].x
= mo_vec
[0].x
;
428 vb
[i
].y
= mo_vec
[0].y
;
429 vb
[i
+ 1].x
= mo_vec
[1].x
;
430 vb
[i
+ 1].y
= mo_vec
[1].y
;
434 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][2].buffer
);
438 case vlMacroBlockTypeFwdPredicted
:
439 case vlMacroBlockTypeBkwdPredicted
:
441 struct vlVertex2f
*vb
;
443 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
446 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
,
447 PIPE_BUFFER_USAGE_CPU_WRITE
450 if (macroblock
->mb_type
== vlMacroBlockTypeBkwdPredicted
)
452 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
453 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
455 if (macroblock
->mo_type
== vlMotionTypeField
)
457 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
458 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
463 mo_vec
[0].x
= macroblock
->PMV
[0][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
464 mo_vec
[0].y
= macroblock
->PMV
[0][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
466 if (macroblock
->mo_type
== vlMotionTypeField
)
468 mo_vec
[1].x
= macroblock
->PMV
[1][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
469 mo_vec
[1].y
= macroblock
->PMV
[1][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
473 if (macroblock
->mo_type
== vlMotionTypeFrame
)
475 for (i
= 0; i
< 24 * 2; i
+= 2)
477 vb
[i
].x
= mo_vec
[0].x
;
478 vb
[i
].y
= mo_vec
[0].y
;
483 for (i
= 0; i
< 24 * 2; i
+= 2)
485 vb
[i
].x
= mo_vec
[0].x
;
486 vb
[i
].y
= mo_vec
[0].y
;
487 vb
[i
+ 1].x
= mo_vec
[1].x
;
488 vb
[i
+ 1].y
= mo_vec
[1].y
;
492 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][1].buffer
);
496 case vlMacroBlockTypeIntra
:
498 const struct vlVertex2f unit
=
500 mc
->surface_tex_inv_size
.x
* VL_MACROBLOCK_WIDTH
,
501 mc
->surface_tex_inv_size
.y
* VL_MACROBLOCK_HEIGHT
503 const struct vlVertex2f half
=
505 mc
->surface_tex_inv_size
.x
* (VL_MACROBLOCK_WIDTH
/ 2),
506 mc
->surface_tex_inv_size
.y
* (VL_MACROBLOCK_HEIGHT
/ 2)
509 struct vlMacroBlockVertexStream0
511 struct vlVertex2f pos
;
512 struct vlVertex2f luma_tc
;
513 struct vlVertex2f cb_tc
;
514 struct vlVertex2f cr_tc
;
517 vb
= (struct vlMacroBlockVertexStream0
*)mc
->pipe
->winsys
->buffer_map
520 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
,
521 PIPE_BUFFER_USAGE_CPU_WRITE
527 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
528 unit
.x
, unit
.y
, 0, 0, half
.x
, half
.y
,
529 32, 2, 1, mc
->zero_block
.x
, mc
->zero_block
.y
535 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
536 unit
.x
, unit
.y
, half
.x
, 0, half
.x
, half
.y
,
537 16, 2, 1, mc
->zero_block
.x
, mc
->zero_block
.y
543 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
544 unit
.x
, unit
.y
, 0, half
.y
, half
.x
, half
.y
,
545 8, 2, 1, mc
->zero_block
.x
, mc
->zero_block
.y
551 macroblock
->cbp
, macroblock
->mbx
, macroblock
->mby
,
552 unit
.x
, unit
.y
, half
.x
, half
.y
, half
.x
, half
.y
,
553 4, 2, 1, mc
->zero_block
.x
, mc
->zero_block
.y
556 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][0].buffer
);
569 struct vlRender
*render
572 struct vlR16SnormBufferedMC
*mc
;
573 struct pipe_context
*pipe
;
574 struct vlVertexShaderConsts
*vs_consts
;
575 unsigned int num_macroblocks
[vlNumMacroBlockExTypes
] = {0};
576 unsigned int offset
[vlNumMacroBlockExTypes
];
577 unsigned int vb_start
= 0;
582 mc
= (struct vlR16SnormBufferedMC
*)render
;
585 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
587 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
589 num_macroblocks
[mb_type_ex
]++;
594 for (i
= 1; i
< vlNumMacroBlockExTypes
; ++i
)
595 offset
[i
] = offset
[i
- 1] + num_macroblocks
[i
- 1];
597 for (i
= 0; i
< mc
->num_macroblocks
; ++i
)
599 enum vlMacroBlockTypeEx mb_type_ex
= vlGetMacroBlockTypeEx(&mc
->macroblocks
[i
]);
601 vlGrabMacroBlockVB(mc
, &mc
->macroblocks
[i
], offset
[mb_type_ex
]);
603 offset
[mb_type_ex
]++;
606 mc
->render_target
.cbufs
[0] = pipe
->screen
->get_tex_surface
609 mc
->buffered_surface
->texture
,
610 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ
| PIPE_BUFFER_USAGE_GPU_WRITE
613 pipe
->set_framebuffer_state(pipe
, &mc
->render_target
);
614 pipe
->set_viewport_state(pipe
, &mc
->viewport
);
615 vs_consts
= pipe
->winsys
->buffer_map
618 mc
->vs_const_buf
.buffer
,
619 PIPE_BUFFER_USAGE_CPU_WRITE
622 vs_consts
->denorm
.x
= mc
->buffered_surface
->texture
->width
[0];
623 vs_consts
->denorm
.y
= mc
->buffered_surface
->texture
->height
[0];
625 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
626 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_VERTEX
, 0, &mc
->vs_const_buf
);
627 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_FRAGMENT
, 0, &mc
->fs_const_buf
);
629 if (num_macroblocks
[vlMacroBlockExTypeIntra
] > 0)
631 pipe
->set_vertex_buffers(pipe
, 1, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
632 pipe
->set_vertex_elements(pipe
, 4, mc
->vertex_elems
);
633 pipe
->set_sampler_textures(pipe
, 3, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
634 pipe
->bind_sampler_states(pipe
, 3, (void**)mc
->samplers
);
635 pipe
->bind_vs_state(pipe
, mc
->i_vs
);
636 pipe
->bind_fs_state(pipe
, mc
->i_fs
);
638 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeIntra
] * 24);
639 vb_start
+= num_macroblocks
[vlMacroBlockExTypeIntra
] * 24;
642 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] > 0)
644 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
645 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
646 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
647 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
648 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
649 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
650 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
652 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24);
653 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24;
656 if (num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] > 0)
658 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
659 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
660 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
661 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
662 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
663 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
664 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
666 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24);
667 vb_start
+= num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24;
670 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] > 0)
672 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
673 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
674 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
675 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
676 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
677 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
678 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
680 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24);
681 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24;
684 if (num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] > 0)
686 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
687 pipe
->set_vertex_elements(pipe
, 6, mc
->vertex_elems
);
688 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
689 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
690 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
691 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
692 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
694 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24);
695 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24;
698 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] > 0)
700 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
701 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
702 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
703 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
704 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
705 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
706 pipe
->bind_vs_state(pipe
, mc
->b_vs
[0]);
707 pipe
->bind_fs_state(pipe
, mc
->b_fs
[0]);
709 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24);
710 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24;
713 if (num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] > 0)
715 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
]);
716 pipe
->set_vertex_elements(pipe
, 8, mc
->vertex_elems
);
717 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
718 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
719 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
720 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
721 pipe
->bind_vs_state(pipe
, mc
->b_vs
[1]);
722 pipe
->bind_fs_state(pipe
, mc
->b_fs
[1]);
724 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, vb_start
, num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24);
725 vb_start
+= num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24;
728 mc
->num_macroblocks
= 0;
734 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
736 struct vlRender
*render
,
737 struct vlMpeg2MacroBlockBatch
*batch
,
738 struct vlSurface
*surface
741 struct vlR16SnormBufferedMC
*mc
;
746 mc
= (struct vlR16SnormBufferedMC
*)render
;
748 if (mc
->buffered_surface
)
752 mc
->buffered_surface
!= surface
/*||
753 mc->past_surface != batch->past_surface ||
754 mc->future_surface != batch->future_surface*/
758 mc
->buffered_surface
= surface
;
759 mc
->past_surface
= batch
->past_surface
;
760 mc
->future_surface
= batch
->future_surface
;
761 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
762 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
767 mc
->buffered_surface
= surface
;
768 mc
->past_surface
= batch
->past_surface
;
769 mc
->future_surface
= batch
->future_surface
;
770 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
771 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
774 for (i
= 0; i
< batch
->num_macroblocks
; ++i
)
775 vlGrabMacroBlock(mc
, &batch
->macroblocks
[i
]);
782 struct vlRender
*render
792 struct vlRender
*render
795 struct vlR16SnormBufferedMC
*mc
;
796 struct pipe_context
*pipe
;
801 mc
= (struct vlR16SnormBufferedMC
*)render
;
804 for (i
= 0; i
< 5; ++i
)
805 pipe
->delete_sampler_state(pipe
, mc
->samplers
[i
]);
807 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
808 for (i
= 0; i
< 3; ++i
)
809 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vertex_bufs
[h
][i
].buffer
);
811 /* Textures 3 & 4 are not created directly, no need to release them here */
812 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
814 pipe_texture_release(&mc
->textures
[i
][0]);
815 pipe_texture_release(&mc
->textures
[i
][1]);
816 pipe_texture_release(&mc
->textures
[i
][2]);
819 pipe
->delete_vs_state(pipe
, mc
->i_vs
);
820 pipe
->delete_fs_state(pipe
, mc
->i_fs
);
822 for (i
= 0; i
< 2; ++i
)
824 pipe
->delete_vs_state(pipe
, mc
->p_vs
[i
]);
825 pipe
->delete_fs_state(pipe
, mc
->p_fs
[i
]);
826 pipe
->delete_vs_state(pipe
, mc
->b_vs
[i
]);
827 pipe
->delete_fs_state(pipe
, mc
->b_fs
[i
]);
830 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
831 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
833 free(mc
->macroblocks
);
840 * Muliplier renormalizes block samples from 16 bits to 12 bits.
841 * Divider is used when calculating Y % 2 for choosing top or bottom
842 * field for P or B macroblocks.
843 * TODO: Use immediates.
845 static const struct vlFragmentShaderConsts fs_consts
=
847 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
848 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
851 static int vlCreateVertexShaderIMB
853 struct vlR16SnormBufferedMC
*mc
856 const unsigned int max_tokens
= 50;
858 struct pipe_context
*pipe
;
859 struct pipe_shader_state vs
;
860 struct tgsi_token
*tokens
;
861 struct tgsi_header
*header
;
863 struct tgsi_full_declaration decl
;
864 struct tgsi_full_instruction inst
;
872 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
875 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
877 header
= (struct tgsi_header
*)&tokens
[1];
878 *header
= tgsi_build_header();
880 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
885 * decl i0 ; Vertex pos
886 * decl i1 ; Luma texcoords
887 * decl i2 ; Chroma Cb texcoords
888 * decl i3 ; Chroma Cr texcoords
890 for (i
= 0; i
< 4; i
++)
892 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
893 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
897 * decl o0 ; Vertex pos
898 * decl o1 ; Luma texcoords
899 * decl o2 ; Chroma Cb texcoords
900 * decl o3 ; Chroma Cr texcoords
902 for (i
= 0; i
< 4; i
++)
904 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
905 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
909 * mov o0, i0 ; Move input vertex pos to output
910 * mov o1, i1 ; Move input luma texcoords to output
911 * mov o2, i2 ; Move input chroma Cb texcoords to output
912 * mov o3, i3 ; Move input chroma Cr texcoords to output
914 for (i
= 0; i
< 4; ++i
)
916 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
917 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
922 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
925 mc
->i_vs
= pipe
->create_vs_state(pipe
, &vs
);
931 static int vlCreateFragmentShaderIMB
933 struct vlR16SnormBufferedMC
*mc
936 const unsigned int max_tokens
= 100;
938 struct pipe_context
*pipe
;
939 struct pipe_shader_state fs
;
940 struct tgsi_token
*tokens
;
941 struct tgsi_header
*header
;
943 struct tgsi_full_declaration decl
;
944 struct tgsi_full_instruction inst
;
952 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
955 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
957 header
= (struct tgsi_header
*)&tokens
[1];
958 *header
= tgsi_build_header();
960 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
965 * decl i0 ; Luma texcoords
966 * decl i1 ; Chroma Cb texcoords
967 * decl i2 ; Chroma Cr texcoords
969 for (i
= 0; i
< 3; ++i
)
971 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
972 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
975 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
976 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
977 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
979 /* decl o0 ; Fragment color */
980 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
981 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
984 decl
= vl_decl_temps(0, 1);
985 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
988 * decl s0 ; Sampler for luma texture
989 * decl s1 ; Sampler for chroma Cb texture
990 * decl s2 ; Sampler for chroma Cr texture
992 for (i
= 0; i
< 3; ++i
)
994 decl
= vl_decl_samplers(i
, i
);
995 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
,max_tokens
- ti
);
999 * tex2d t1, i0, s0 ; Read texel from luma texture
1000 * mov t0.x, t1.x ; Move luma sample into .x component
1001 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1002 * mov t0.y, t1.x ; Move Cb sample into .y component
1003 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1004 * mov t0.z, t1.x ; Move Cr sample into .z component
1006 for (i
= 0; i
< 3; ++i
)
1008 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1009 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1011 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1012 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1013 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1014 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1015 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1016 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1020 /* mul o0, t0, c0 ; Rescale texel to correct range */
1021 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1022 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1026 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1029 mc
->i_fs
= pipe
->create_fs_state(pipe
, &fs
);
1035 static int vlCreateVertexShaderFramePMB
1037 struct vlR16SnormBufferedMC
*mc
1040 const unsigned int max_tokens
= 100;
1042 struct pipe_context
*pipe
;
1043 struct pipe_shader_state vs
;
1044 struct tgsi_token
*tokens
;
1045 struct tgsi_header
*header
;
1047 struct tgsi_full_declaration decl
;
1048 struct tgsi_full_instruction inst
;
1056 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1059 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1061 header
= (struct tgsi_header
*)&tokens
[1];
1062 *header
= tgsi_build_header();
1064 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1069 * decl i0 ; Vertex pos
1070 * decl i1 ; Luma texcoords
1071 * decl i2 ; Chroma Cb texcoords
1072 * decl i3 ; Chroma Cr texcoords
1073 * decl i4 ; Ref surface top field texcoords
1074 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1076 for (i
= 0; i
< 6; i
++)
1078 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1079 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1083 * decl o0 ; Vertex pos
1084 * decl o1 ; Luma texcoords
1085 * decl o2 ; Chroma Cb texcoords
1086 * decl o3 ; Chroma Cr texcoords
1087 * decl o4 ; Ref macroblock texcoords
1089 for (i
= 0; i
< 5; i
++)
1091 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1092 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1096 * mov o0, i0 ; Move input vertex pos to output
1097 * mov o1, i1 ; Move input luma texcoords to output
1098 * mov o2, i2 ; Move input chroma Cb texcoords to output
1099 * mov o3, i3 ; Move input chroma Cr texcoords to output
1101 for (i
= 0; i
< 4; ++i
)
1103 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1104 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1107 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1108 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 4);
1109 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1113 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1116 mc
->p_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1122 static int vlCreateVertexShaderFieldPMB
1124 struct vlR16SnormBufferedMC
*mc
1127 const unsigned int max_tokens
= 100;
1129 struct pipe_context
*pipe
;
1130 struct pipe_shader_state vs
;
1131 struct tgsi_token
*tokens
;
1132 struct tgsi_header
*header
;
1134 struct tgsi_full_declaration decl
;
1135 struct tgsi_full_instruction inst
;
1143 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1146 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1148 header
= (struct tgsi_header
*)&tokens
[1];
1149 *header
= tgsi_build_header();
1151 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1156 * decl i0 ; Vertex pos
1157 * decl i1 ; Luma texcoords
1158 * decl i2 ; Chroma Cb texcoords
1159 * decl i3 ; Chroma Cr texcoords
1160 * decl i4 ; Ref macroblock top field texcoords
1161 * decl i5 ; Ref macroblock bottom field texcoords
1163 for (i
= 0; i
< 6; i
++)
1165 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1166 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1169 /* decl c0 ; Texcoord denorm coefficients */
1170 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1171 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1174 * decl o0 ; Vertex pos
1175 * decl o1 ; Luma texcoords
1176 * decl o2 ; Chroma Cb texcoords
1177 * decl o3 ; Chroma Cr texcoords
1178 * decl o4 ; Ref macroblock top field texcoords
1179 * decl o5 ; Ref macroblock bottom field texcoords
1180 * decl o6 ; Denormalized vertex pos
1182 for (i
= 0; i
< 7; i
++)
1184 decl
= vl_decl_output((i
== 0 || i
== 6) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1185 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1189 * mov o0, i0 ; Move input vertex pos to output
1190 * mov o1, i1 ; Move input luma texcoords to output
1191 * mov o2, i2 ; Move input chroma Cb texcoords to output
1192 * mov o3, i3 ; Move input chroma Cr texcoords to output
1194 for (i
= 0; i
< 4; ++i
)
1196 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1197 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1201 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1202 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1204 for (i
= 0; i
< 2; ++i
)
1206 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1207 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1210 /* mul o6, i0, c0 ; Denorm vertex pos */
1211 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 6, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1212 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1216 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1219 mc
->p_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1225 static int vlCreateFragmentShaderFramePMB
1227 struct vlR16SnormBufferedMC
*mc
1230 const unsigned int max_tokens
= 100;
1232 struct pipe_context
*pipe
;
1233 struct pipe_shader_state fs
;
1234 struct tgsi_token
*tokens
;
1235 struct tgsi_header
*header
;
1237 struct tgsi_full_declaration decl
;
1238 struct tgsi_full_instruction inst
;
1246 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1249 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1251 header
= (struct tgsi_header
*)&tokens
[1];
1252 *header
= tgsi_build_header();
1254 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1259 * decl i0 ; Luma texcoords
1260 * decl i1 ; Chroma Cb texcoords
1261 * decl i2 ; Chroma Cr texcoords
1262 * decl i3 ; Ref macroblock texcoords
1264 for (i
= 0; i
< 4; ++i
)
1266 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1267 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1270 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1271 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1272 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1274 /* decl o0 ; Fragment color */
1275 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1276 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1279 decl
= vl_decl_temps(0, 1);
1280 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1283 * decl s0 ; Sampler for luma texture
1284 * decl s1 ; Sampler for chroma Cb texture
1285 * decl s2 ; Sampler for chroma Cr texture
1286 * decl s3 ; Sampler for ref surface texture
1288 for (i
= 0; i
< 4; ++i
)
1290 decl
= vl_decl_samplers(i
, i
);
1291 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1295 * tex2d t1, i0, s0 ; Read texel from luma texture
1296 * mov t0.x, t1.x ; Move luma sample into .x component
1297 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1298 * mov t0.y, t1.x ; Move Cb sample into .y component
1299 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1300 * mov t0.z, t1.x ; Move Cr sample into .z component
1302 for (i
= 0; i
< 3; ++i
)
1304 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1305 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1307 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1308 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1309 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1310 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1311 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1312 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1316 /* mul t0, t0, c0 ; Rescale texel to correct range */
1317 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1318 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1320 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1321 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 3, TGSI_FILE_SAMPLER
, 3);
1322 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1324 /* add o0, t0, t1 ; Add ref and differential to form final output */
1325 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1326 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1330 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1333 mc
->p_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1339 static int vlCreateFragmentShaderFieldPMB
1341 struct vlR16SnormBufferedMC
*mc
1344 const unsigned int max_tokens
= 200;
1346 struct pipe_context
*pipe
;
1347 struct pipe_shader_state fs
;
1348 struct tgsi_token
*tokens
;
1349 struct tgsi_header
*header
;
1351 struct tgsi_full_declaration decl
;
1352 struct tgsi_full_instruction inst
;
1360 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1363 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1365 header
= (struct tgsi_header
*)&tokens
[1];
1366 *header
= tgsi_build_header();
1368 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1373 * decl i0 ; Luma texcoords
1374 * decl i1 ; Chroma Cb texcoords
1375 * decl i2 ; Chroma Cr texcoords
1376 * decl i3 ; Ref macroblock top field texcoords
1377 * decl i4 ; Ref macroblock bottom field texcoords
1378 * decl i5 ; Denormalized vertex pos
1380 for (i
= 0; i
< 6; ++i
)
1382 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1383 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1387 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1388 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1390 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1391 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1393 /* decl o0 ; Fragment color */
1394 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1395 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1398 decl
= vl_decl_temps(0, 4);
1399 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1402 * decl s0 ; Sampler for luma texture
1403 * decl s1 ; Sampler for chroma Cb texture
1404 * decl s2 ; Sampler for chroma Cr texture
1405 * decl s3 ; Sampler for ref surface texture
1407 for (i
= 0; i
< 4; ++i
)
1409 decl
= vl_decl_samplers(i
, i
);
1410 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1414 * tex2d t1, i0, s0 ; Read texel from luma texture
1415 * mov t0.x, t1.x ; Move luma sample into .x component
1416 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1417 * mov t0.y, t1.x ; Move Cb sample into .y component
1418 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1419 * mov t0.z, t1.x ; Move Cr sample into .z component
1421 for (i
= 0; i
< 3; ++i
)
1423 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1424 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1426 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1427 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1428 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1429 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1430 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1431 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1435 /* mul t0, t0, c0 ; Rescale texel to correct range */
1436 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1437 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1440 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1441 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1443 for (i
= 0; i
< 2; ++i
)
1445 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
1446 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1449 /* XXX: Pos values off by 0.5? */
1450 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1451 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 5, TGSI_FILE_CONSTANT
, 1);
1452 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1453 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1454 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1455 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1456 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1457 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1458 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1459 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1460 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1462 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1463 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1464 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1465 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1466 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1467 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1468 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1470 /* floor t3, t3 ; Get rid of fractional part */
1471 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1472 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1474 /* mul t3, t3, c1.y ; Multiply by 2 */
1475 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1476 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1477 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1478 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1479 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1480 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1482 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1483 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1484 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1486 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1487 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1488 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1489 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1491 /* add o0, t0, t1 ; Add ref and differential to form final output */
1492 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1493 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1497 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1500 mc
->p_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
1506 static int vlCreateVertexShaderFrameBMB
1508 struct vlR16SnormBufferedMC
*mc
1511 const unsigned int max_tokens
= 100;
1513 struct pipe_context
*pipe
;
1514 struct pipe_shader_state vs
;
1515 struct tgsi_token
*tokens
;
1516 struct tgsi_header
*header
;
1518 struct tgsi_full_declaration decl
;
1519 struct tgsi_full_instruction inst
;
1527 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1530 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1532 header
= (struct tgsi_header
*)&tokens
[1];
1533 *header
= tgsi_build_header();
1535 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1540 * decl i0 ; Vertex pos
1541 * decl i1 ; Luma texcoords
1542 * decl i2 ; Chroma Cb texcoords
1543 * decl i3 ; Chroma Cr texcoords
1544 * decl i4 ; First ref macroblock top field texcoords
1545 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1546 * decl i6 ; Second ref macroblock top field texcoords
1547 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1549 for (i
= 0; i
< 8; i
++)
1551 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1552 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1556 * decl o0 ; Vertex pos
1557 * decl o1 ; Luma texcoords
1558 * decl o2 ; Chroma Cb texcoords
1559 * decl o3 ; Chroma Cr texcoords
1560 * decl o4 ; First ref macroblock texcoords
1561 * decl o5 ; Second ref macroblock texcoords
1563 for (i
= 0; i
< 6; i
++)
1565 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1566 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1570 * mov o0, i0 ; Move input vertex pos to output
1571 * mov o1, i1 ; Move input luma texcoords to output
1572 * mov o2, i2 ; Move input chroma Cb texcoords to output
1573 * mov o3, i3 ; Move input chroma Cr texcoords to output
1575 for (i
= 0; i
< 4; ++i
)
1577 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1578 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1582 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1583 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1585 for (i
= 0; i
< 2; ++i
)
1587 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, (i
+ 2) * 2);
1588 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1593 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1596 mc
->b_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1602 static int vlCreateVertexShaderFieldBMB
1604 struct vlR16SnormBufferedMC
*mc
1607 const unsigned int max_tokens
= 100;
1609 struct pipe_context
*pipe
;
1610 struct pipe_shader_state vs
;
1611 struct tgsi_token
*tokens
;
1612 struct tgsi_header
*header
;
1614 struct tgsi_full_declaration decl
;
1615 struct tgsi_full_instruction inst
;
1623 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1626 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1628 header
= (struct tgsi_header
*)&tokens
[1];
1629 *header
= tgsi_build_header();
1631 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1636 * decl i0 ; Vertex pos
1637 * decl i1 ; Luma texcoords
1638 * decl i2 ; Chroma Cb texcoords
1639 * decl i3 ; Chroma Cr texcoords
1640 * decl i4 ; First ref macroblock top field texcoords
1641 * decl i5 ; First ref macroblock bottom field texcoords
1642 * decl i6 ; Second ref macroblock top field texcoords
1643 * decl i7 ; Second ref macroblock bottom field texcoords
1645 for (i
= 0; i
< 8; i
++)
1647 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1648 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1651 /* decl c0 ; Denorm coefficients */
1652 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 6);
1653 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1656 * decl o0 ; Vertex pos
1657 * decl o1 ; Luma texcoords
1658 * decl o2 ; Chroma Cb texcoords
1659 * decl o3 ; Chroma Cr texcoords
1660 * decl o4 ; First ref macroblock top field texcoords
1661 * decl o5 ; First ref macroblock Bottom field texcoords
1662 * decl o6 ; Second ref macroblock top field texcoords
1663 * decl o7 ; Second ref macroblock Bottom field texcoords
1664 * decl o8 ; Denormalized vertex pos
1666 for (i
= 0; i
< 9; i
++)
1668 decl
= vl_decl_output((i
== 0 || i
== 8) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1669 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1673 decl
= vl_decl_temps(0, 1);
1674 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1677 * mov o0, i0 ; Move input vertex pos to output
1678 * mov o1, i1 ; Move input luma texcoords to output
1679 * mov o2, i2 ; Move input chroma Cb texcoords to output
1680 * mov o3, i3 ; Move input chroma Cr texcoords to output
1682 for (i
= 0; i
< 4; ++i
)
1684 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
);
1685 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1689 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1690 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1691 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1692 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1694 for (i
= 0; i
< 4; ++i
)
1696 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 4, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 4);
1697 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1700 /* mul o8, i0, c0 ; Denorm vertex pos */
1701 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 8, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1702 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1706 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1709 mc
->b_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1715 static int vlCreateFragmentShaderFrameBMB
1717 struct vlR16SnormBufferedMC
*mc
1720 const unsigned int max_tokens
= 100;
1722 struct pipe_context
*pipe
;
1723 struct pipe_shader_state fs
;
1724 struct tgsi_token
*tokens
;
1725 struct tgsi_header
*header
;
1727 struct tgsi_full_declaration decl
;
1728 struct tgsi_full_instruction inst
;
1736 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1739 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1741 header
= (struct tgsi_header
*)&tokens
[1];
1742 *header
= tgsi_build_header();
1744 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1749 * decl i0 ; Luma texcoords
1750 * decl i1 ; Chroma Cb texcoords
1751 * decl i2 ; Chroma Cr texcoords
1752 * decl i3 ; First ref macroblock texcoords
1753 * decl i4 ; Second ref macroblock texcoords
1755 for (i
= 0; i
< 5; ++i
)
1757 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1758 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1762 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1763 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1765 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1766 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1768 /* decl o0 ; Fragment color */
1769 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1770 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1773 decl
= vl_decl_temps(0, 2);
1774 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1777 * decl s0 ; Sampler for luma texture
1778 * decl s1 ; Sampler for chroma Cb texture
1779 * decl s2 ; Sampler for chroma Cr texture
1780 * decl s3 ; Sampler for first ref surface texture
1781 * decl s4 ; Sampler for second ref surface texture
1783 for (i
= 0; i
< 5; ++i
)
1785 decl
= vl_decl_samplers(i
, i
);
1786 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1790 * tex2d t1, i0, s0 ; Read texel from luma texture
1791 * mov t0.x, t1.x ; Move luma sample into .x component
1792 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1793 * mov t0.y, t1.x ; Move Cb sample into .y component
1794 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1795 * mov t0.z, t1.x ; Move Cr sample into .z component
1797 for (i
= 0; i
< 3; ++i
)
1799 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1800 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1802 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1803 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1804 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1805 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1806 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1807 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1811 /* mul t0, t0, c0 ; Rescale texel to correct range */
1812 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1813 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1816 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1817 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1819 for (i
= 0; i
< 2; ++i
)
1821 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, i
+ 3);
1822 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1825 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1826 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1827 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1828 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1829 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1830 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1831 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1833 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1834 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1835 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1839 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1842 mc
->b_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1848 static int vlCreateFragmentShaderFieldBMB
1850 struct vlR16SnormBufferedMC
*mc
1853 const unsigned int max_tokens
= 200;
1855 struct pipe_context
*pipe
;
1856 struct pipe_shader_state fs
;
1857 struct tgsi_token
*tokens
;
1858 struct tgsi_header
*header
;
1860 struct tgsi_full_declaration decl
;
1861 struct tgsi_full_instruction inst
;
1869 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1872 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1874 header
= (struct tgsi_header
*)&tokens
[1];
1875 *header
= tgsi_build_header();
1877 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1882 * decl i0 ; Luma texcoords
1883 * decl i1 ; Chroma Cb texcoords
1884 * decl i2 ; Chroma Cr texcoords
1885 * decl i3 ; First ref macroblock top field texcoords
1886 * decl i4 ; First ref macroblock bottom field texcoords
1887 * decl i5 ; Second ref macroblock top field texcoords
1888 * decl i6 ; Second ref macroblock bottom field texcoords
1889 * decl i7 ; Denormalized vertex pos
1891 for (i
= 0; i
< 8; ++i
)
1893 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1894 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1898 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1899 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1900 * ; and for Y-mod-2 top/bottom field selection
1902 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1903 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1905 /* decl o0 ; Fragment color */
1906 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1907 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1910 decl
= vl_decl_temps(0, 5);
1911 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1914 * decl s0 ; Sampler for luma texture
1915 * decl s1 ; Sampler for chroma Cb texture
1916 * decl s2 ; Sampler for chroma Cr texture
1917 * decl s3 ; Sampler for first ref surface texture
1918 * decl s4 ; Sampler for second ref surface texture
1920 for (i
= 0; i
< 5; ++i
)
1922 decl
= vl_decl_samplers(i
, i
);
1923 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1927 * tex2d t1, i0, s0 ; Read texel from luma texture
1928 * mov t0.x, t1.x ; Move luma sample into .x component
1929 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1930 * mov t0.y, t1.x ; Move Cb sample into .y component
1931 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1932 * mov t0.z, t1.x ; Move Cr sample into .z component
1934 for (i
= 0; i
< 3; ++i
)
1936 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, i
, TGSI_FILE_SAMPLER
, i
);
1937 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1939 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1940 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1941 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1942 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1943 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1944 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1948 /* mul t0, t0, c0 ; Rescale texel to correct range */
1949 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1950 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1952 /* XXX: Pos values off by 0.5? */
1953 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1954 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 7, TGSI_FILE_CONSTANT
, 1);
1955 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1956 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1957 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1958 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1959 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1960 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1961 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1962 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1963 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1965 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1966 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1967 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1968 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1969 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1970 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1971 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1973 /* floor t3, t3 ; Get rid of fractional part */
1974 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1975 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1977 /* mul t3, t3, c1.y ; Multiply by 2 */
1978 inst
= vl_inst3( TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1979 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1980 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1981 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1982 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1983 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1985 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1986 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1987 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1990 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
1991 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
1993 for (i
= 0; i
< 2; ++i
)
1995 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 3);
1996 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1999 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2000 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2001 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2002 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2005 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2006 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2008 for (i
= 0; i
< 2; ++i
)
2010 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 4, TGSI_FILE_INPUT
, i
+ 5, TGSI_FILE_SAMPLER
, 4);
2011 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2014 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2015 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2016 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 2, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 5);
2017 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2019 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2020 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2021 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2022 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2023 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2024 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2025 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2027 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2028 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
2029 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2033 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2036 mc
->b_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
2042 static int vlCreateDataBufs
2044 struct vlR16SnormBufferedMC
*mc
2047 const unsigned int mbw
= align(mc
->picture_width
, VL_MACROBLOCK_WIDTH
) / VL_MACROBLOCK_WIDTH
;
2048 const unsigned int mbh
= align(mc
->picture_height
, VL_MACROBLOCK_HEIGHT
) / VL_MACROBLOCK_HEIGHT
;
2049 const unsigned int num_mb_per_frame
= mbw
* mbh
;
2051 struct pipe_context
*pipe
;
2058 /* Create our vertex buffers */
2059 for (h
= 0; h
< NUM_BUF_SETS
; ++h
)
2061 mc
->vertex_bufs
[h
][0].pitch
= sizeof(struct vlVertex2f
) * 4;
2062 mc
->vertex_bufs
[h
][0].max_index
= 24 * num_mb_per_frame
- 1;
2063 mc
->vertex_bufs
[h
][0].buffer_offset
= 0;
2064 mc
->vertex_bufs
[h
][0].buffer
= pipe
->winsys
->buffer_create
2068 PIPE_BUFFER_USAGE_VERTEX
,
2069 sizeof(struct vlVertex2f
) * 4 * 24 * num_mb_per_frame
2072 for (i
= 1; i
< 3; ++i
)
2074 mc
->vertex_bufs
[h
][i
].pitch
= sizeof(struct vlVertex2f
) * 2;
2075 mc
->vertex_bufs
[h
][i
].max_index
= 24 * num_mb_per_frame
- 1;
2076 mc
->vertex_bufs
[h
][i
].buffer_offset
= 0;
2077 mc
->vertex_bufs
[h
][i
].buffer
= pipe
->winsys
->buffer_create
2081 PIPE_BUFFER_USAGE_VERTEX
,
2082 sizeof(struct vlVertex2f
) * 2 * 24 * num_mb_per_frame
2087 /* Position element */
2088 mc
->vertex_elems
[0].src_offset
= 0;
2089 mc
->vertex_elems
[0].vertex_buffer_index
= 0;
2090 mc
->vertex_elems
[0].nr_components
= 2;
2091 mc
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2093 /* Luma, texcoord element */
2094 mc
->vertex_elems
[1].src_offset
= sizeof(struct vlVertex2f
);
2095 mc
->vertex_elems
[1].vertex_buffer_index
= 0;
2096 mc
->vertex_elems
[1].nr_components
= 2;
2097 mc
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2099 /* Chroma Cr texcoord element */
2100 mc
->vertex_elems
[2].src_offset
= sizeof(struct vlVertex2f
) * 2;
2101 mc
->vertex_elems
[2].vertex_buffer_index
= 0;
2102 mc
->vertex_elems
[2].nr_components
= 2;
2103 mc
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2105 /* Chroma Cb texcoord element */
2106 mc
->vertex_elems
[3].src_offset
= sizeof(struct vlVertex2f
) * 3;
2107 mc
->vertex_elems
[3].vertex_buffer_index
= 0;
2108 mc
->vertex_elems
[3].nr_components
= 2;
2109 mc
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2111 /* First ref surface top field texcoord element */
2112 mc
->vertex_elems
[4].src_offset
= 0;
2113 mc
->vertex_elems
[4].vertex_buffer_index
= 1;
2114 mc
->vertex_elems
[4].nr_components
= 2;
2115 mc
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2117 /* First ref surface bottom field texcoord element */
2118 mc
->vertex_elems
[5].src_offset
= sizeof(struct vlVertex2f
);
2119 mc
->vertex_elems
[5].vertex_buffer_index
= 1;
2120 mc
->vertex_elems
[5].nr_components
= 2;
2121 mc
->vertex_elems
[5].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2123 /* Second ref surface top field texcoord element */
2124 mc
->vertex_elems
[6].src_offset
= 0;
2125 mc
->vertex_elems
[6].vertex_buffer_index
= 2;
2126 mc
->vertex_elems
[6].nr_components
= 2;
2127 mc
->vertex_elems
[6].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2129 /* Second ref surface bottom field texcoord element */
2130 mc
->vertex_elems
[7].src_offset
= sizeof(struct vlVertex2f
);
2131 mc
->vertex_elems
[7].vertex_buffer_index
= 2;
2132 mc
->vertex_elems
[7].nr_components
= 2;
2133 mc
->vertex_elems
[7].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2135 /* Create our constant buffer */
2136 mc
->vs_const_buf
.size
= sizeof(struct vlVertexShaderConsts
);
2137 mc
->vs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2141 PIPE_BUFFER_USAGE_CONSTANT
,
2142 mc
->vs_const_buf
.size
2145 mc
->fs_const_buf
.size
= sizeof(struct vlFragmentShaderConsts
);
2146 mc
->fs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2150 PIPE_BUFFER_USAGE_CONSTANT
,
2151 mc
->fs_const_buf
.size
2156 pipe
->winsys
->buffer_map(pipe
->winsys
, mc
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
2158 sizeof(struct vlFragmentShaderConsts
)
2161 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
2163 mc
->macroblocks
= malloc(sizeof(struct vlMpeg2MacroBlock
) * num_mb_per_frame
);
2170 struct vlR16SnormBufferedMC
*mc
2173 struct pipe_context
*pipe
;
2174 struct pipe_sampler_state sampler
;
2175 struct pipe_texture
template;
2176 unsigned int filters
[5];
2183 /* For MC we render to textures, which are rounded up to nearest POT */
2184 mc
->viewport
.scale
[0] = vlRoundUpPOT(mc
->picture_width
);
2185 mc
->viewport
.scale
[1] = vlRoundUpPOT(mc
->picture_height
);
2186 mc
->viewport
.scale
[2] = 1;
2187 mc
->viewport
.scale
[3] = 1;
2188 mc
->viewport
.translate
[0] = 0;
2189 mc
->viewport
.translate
[1] = 0;
2190 mc
->viewport
.translate
[2] = 0;
2191 mc
->viewport
.translate
[3] = 0;
2193 mc
->render_target
.width
= vlRoundUpPOT(mc
->picture_width
);
2194 mc
->render_target
.height
= vlRoundUpPOT(mc
->picture_height
);
2195 mc
->render_target
.num_cbufs
= 1;
2196 /* FB for MC stage is a vlSurface created by the user, set at render time */
2197 mc
->render_target
.zsbuf
= NULL
;
2199 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
2200 /* FIXME: Linear causes discoloration around block edges */
2201 filters
[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2202 filters
[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST
/*: PIPE_TEX_FILTER_LINEAR*/;
2203 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
2204 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
2206 for (i
= 0; i
< 5; ++i
)
2208 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2209 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2210 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2211 sampler
.min_img_filter
= filters
[i
];
2212 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
2213 sampler
.mag_img_filter
= filters
[i
];
2214 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
2215 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
2216 sampler
.normalized_coords
= 1;
2217 /*sampler.prefilter = ;*/
2218 /*sampler.shadow_ambient = ;*/
2219 /*sampler.lod_bias = ;*/
2220 sampler
.min_lod
= 0;
2221 /*sampler.max_lod = ;*/
2222 /*sampler.border_color[i] = ;*/
2223 /*sampler.max_anisotropy = ;*/
2224 mc
->samplers
[i
] = pipe
->create_sampler_state(pipe
, &sampler
);
2227 memset(&template, 0, sizeof(struct pipe_texture
));
2228 template.target
= PIPE_TEXTURE_2D
;
2229 template.format
= PIPE_FORMAT_R16_SNORM
;
2230 template.last_level
= 0;
2231 template.width
[0] = vlRoundUpPOT(mc
->picture_width
);
2232 template.height
[0] = vlRoundUpPOT(mc
->picture_height
);
2233 template.depth
[0] = 1;
2234 template.compressed
= 0;
2235 pf_get_block(template.format
, &template.block
);
2237 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2238 mc
->textures
[i
][0] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2240 if (mc
->picture_format
== vlFormatYCbCr420
)
2242 template.width
[0] = vlRoundUpPOT(mc
->picture_width
/ 2);
2243 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2245 else if (mc
->picture_format
== vlFormatYCbCr422
)
2246 template.height
[0] = vlRoundUpPOT(mc
->picture_height
/ 2);
2248 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2250 mc
->textures
[i
][1] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2251 mc
->textures
[i
][2] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2254 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2256 vlCreateVertexShaderIMB(mc
);
2257 vlCreateFragmentShaderIMB(mc
);
2258 vlCreateVertexShaderFramePMB(mc
);
2259 vlCreateVertexShaderFieldPMB(mc
);
2260 vlCreateFragmentShaderFramePMB(mc
);
2261 vlCreateFragmentShaderFieldPMB(mc
);
2262 vlCreateVertexShaderFrameBMB(mc
);
2263 vlCreateVertexShaderFieldBMB(mc
);
2264 vlCreateFragmentShaderFrameBMB(mc
);
2265 vlCreateFragmentShaderFieldBMB(mc
);
2266 vlCreateDataBufs(mc
);
2271 int vlCreateR16SNormBufferedMC
2273 struct pipe_context
*pipe
,
2274 unsigned int picture_width
,
2275 unsigned int picture_height
,
2276 enum vlFormat picture_format
,
2277 struct vlRender
**render
2280 struct vlR16SnormBufferedMC
*mc
;
2285 mc
= calloc(1, sizeof(struct vlR16SnormBufferedMC
));
2287 mc
->base
.vlBegin
= &vlBegin
;
2288 mc
->base
.vlRenderMacroBlocksMpeg2
= &vlRenderMacroBlocksMpeg2R16SnormBuffered
;
2289 mc
->base
.vlEnd
= &vlEnd
;
2290 mc
->base
.vlFlush
= &vlFlush
;
2291 mc
->base
.vlDestroy
= &vlDestroy
;
2293 mc
->picture_width
= picture_width
;
2294 mc
->picture_height
= picture_height
;
2297 mc
->buffered_surface
= NULL
;
2298 mc
->past_surface
= NULL
;
2299 mc
->future_surface
= NULL
;
2300 mc
->num_macroblocks
= 0;
2304 *render
= &mc
->base
;