2 #include "vl_r16snorm_mc_buf.h"
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
20 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
22 enum vlMacroBlockTypeEx
24 vlMacroBlockExTypeIntra
,
25 vlMacroBlockExTypeFwdPredictedFrame
,
26 vlMacroBlockExTypeFwdPredictedField
,
27 vlMacroBlockExTypeBkwdPredictedFrame
,
28 vlMacroBlockExTypeBkwdPredictedField
,
29 vlMacroBlockExTypeBiPredictedFrame
,
30 vlMacroBlockExTypeBiPredictedField
,
32 vlNumMacroBlockExTypes
35 struct vlVertexShaderConsts
37 struct vlVertex4f denorm
;
40 struct vlFragmentShaderConsts
42 struct vlVertex4f multiplier
;
43 struct vlVertex4f div
;
46 struct vlR16SnormBufferedMC
50 unsigned int video_width
, video_height
;
51 enum vlFormat video_format
;
54 struct vlSurface
*buffered_surface
;
55 struct vlSurface
*past_surface
, *future_surface
;
56 struct vlVertex2f surface_tex_inv_size
;
57 unsigned int num_macroblocks
[vlNumMacroBlockExTypes
];
58 unsigned int total_num_macroblocks
;
60 struct pipe_context
*pipe
;
61 struct pipe_viewport_state viewport
;
62 struct pipe_framebuffer_state render_target
;
63 struct pipe_sampler_state
*samplers
[5];
64 struct pipe_texture
*textures
[NUM_BUF_SETS
][5];
65 void *i_vs
, *p_vs
[2], *b_vs
[2];
66 void *i_fs
, *p_fs
[2], *b_fs
[2];
67 struct pipe_vertex_buffer vertex_bufs
[NUM_BUF_SETS
][vlNumMacroBlockExTypes
][3];
68 struct pipe_vertex_element vertex_elems
[5];
69 struct pipe_constant_buffer vs_const_buf
, fs_const_buf
;
74 struct vlRender
*render
82 static int vlGrabFrameCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
86 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
90 src
+ y
* VL_BLOCK_WIDTH
,
97 static int vlGrabFieldCodedBlock(short *src
, short *dst
, unsigned int dst_pitch
)
101 for (y
= 0; y
< VL_BLOCK_HEIGHT
/ 2; ++y
)
104 dst
+ y
* dst_pitch
* 2,
105 src
+ y
* VL_BLOCK_WIDTH
,
109 dst
+= VL_BLOCK_HEIGHT
* dst_pitch
;
111 for (; y
< VL_BLOCK_HEIGHT
; ++y
)
114 dst
+ y
* dst_pitch
* 2,
115 src
+ y
* VL_BLOCK_WIDTH
,
122 static int vlGrabNoBlock(short *dst
, unsigned int dst_pitch
)
126 for (y
= 0; y
< VL_BLOCK_HEIGHT
; ++y
)
137 static int vlGrabBlocks
139 struct vlR16SnormBufferedMC
*mc
,
142 enum vlDCTType dct_type
,
143 unsigned int coded_block_pattern
,
147 struct pipe_surface
*tex_surface
;
149 unsigned int tex_pitch
;
150 unsigned int x
, y
, tb
= 0, sb
= 0;
151 unsigned int mbpx
= mbx
* VL_MACROBLOCK_WIDTH
, mbpy
= mby
* VL_MACROBLOCK_HEIGHT
;
156 tex_surface
= mc
->pipe
->screen
->get_tex_surface
159 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
163 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
164 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
166 texels
+= mbpy
* tex_pitch
+ mbpx
;
168 for (y
= 0; y
< 2; ++y
)
170 for (x
= 0; x
< 2; ++x
, ++tb
)
172 if ((coded_block_pattern
>> (5 - tb
)) & 1)
174 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
176 if (dct_type
== vlDCTTypeFrameCoded
)
178 vlGrabFrameCodedBlock
181 texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
,
187 vlGrabFieldCodedBlock
190 texels
+ y
* tex_pitch
+ x
* VL_BLOCK_WIDTH
,
198 vlGrabNoBlock(texels
+ y
* tex_pitch
* VL_BLOCK_HEIGHT
+ x
* VL_BLOCK_WIDTH
, tex_pitch
);
202 pipe_surface_unmap(tex_surface
);
204 /* TODO: Implement 422, 444 */
208 for (tb
= 0; tb
< 2; ++tb
)
210 tex_surface
= mc
->pipe
->screen
->get_tex_surface
213 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][tb
+ 1],
214 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
217 texels
= pipe_surface_map(tex_surface
, PIPE_BUFFER_USAGE_CPU_WRITE
);
218 tex_pitch
= tex_surface
->stride
/ tex_surface
->block
.size
;
220 texels
+= mbpy
* tex_pitch
+ mbpx
;
222 if ((coded_block_pattern
>> (1 - tb
)) & 1)
224 short *cur_block
= blocks
+ sb
* VL_BLOCK_WIDTH
* VL_BLOCK_HEIGHT
;
226 vlGrabFrameCodedBlock
236 vlGrabNoBlock(texels
, tex_pitch
);
238 pipe_surface_unmap(tex_surface
);
245 static int vlGrabMacroBlock
247 struct vlR16SnormBufferedMC
*mc
,
248 struct vlMpeg2MacroBlock
*macroblock
251 const struct vlVertex2f unit
=
253 mc
->surface_tex_inv_size
.x
* VL_MACROBLOCK_WIDTH
,
254 mc
->surface_tex_inv_size
.y
* VL_MACROBLOCK_HEIGHT
256 const struct vlVertex2f half
=
258 mc
->surface_tex_inv_size
.x
* (VL_MACROBLOCK_WIDTH
/ 2),
259 mc
->surface_tex_inv_size
.y
* (VL_MACROBLOCK_HEIGHT
/ 2)
262 struct vlVertex2f
*vb
;
263 enum vlMacroBlockTypeEx mb_type_ex
;
264 struct vlVertex2f mo_vec
[2];
270 switch (macroblock
->mb_type
)
272 case vlMacroBlockTypeIntra
:
274 mb_type_ex
= vlMacroBlockExTypeIntra
;
277 case vlMacroBlockTypeFwdPredicted
:
279 mb_type_ex
= macroblock
->mo_type
== vlMotionTypeFrame
?
280 vlMacroBlockExTypeFwdPredictedFrame
: vlMacroBlockExTypeFwdPredictedField
;
283 case vlMacroBlockTypeBkwdPredicted
:
285 mb_type_ex
= macroblock
->mo_type
== vlMotionTypeFrame
?
286 vlMacroBlockExTypeBkwdPredictedFrame
: vlMacroBlockExTypeBkwdPredictedField
;
289 case vlMacroBlockTypeBiPredicted
:
291 mb_type_ex
= macroblock
->mo_type
== vlMotionTypeFrame
?
292 vlMacroBlockExTypeBiPredictedFrame
: vlMacroBlockExTypeBiPredictedField
;
299 switch (macroblock
->mb_type
)
301 case vlMacroBlockTypeBiPredicted
:
303 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
306 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][2].buffer
,
307 PIPE_BUFFER_USAGE_CPU_WRITE
308 ) + mc
->num_macroblocks
[mb_type_ex
] * 2 * 24;
310 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
311 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
313 if (macroblock
->mo_type
== vlMotionTypeFrame
)
315 for (i
= 0; i
< 24 * 2; i
+= 2)
317 vb
[i
].x
= mo_vec
[0].x
;
318 vb
[i
].y
= mo_vec
[0].y
;
323 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
324 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
326 for (i
= 0; i
< 24 * 2; i
+= 2)
328 vb
[i
].x
= mo_vec
[0].x
;
329 vb
[i
].y
= mo_vec
[0].y
;
330 vb
[i
+ 1].x
= mo_vec
[1].x
;
331 vb
[i
+ 1].y
= mo_vec
[1].y
;
335 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][2].buffer
);
339 case vlMacroBlockTypeFwdPredicted
:
340 case vlMacroBlockTypeBkwdPredicted
:
342 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
345 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][1].buffer
,
346 PIPE_BUFFER_USAGE_CPU_WRITE
347 ) + mc
->num_macroblocks
[mb_type_ex
] * 2 * 24;
349 if (macroblock
->mb_type
== vlMacroBlockTypeBkwdPredicted
)
351 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
352 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
354 if (macroblock
->mo_type
== vlMotionTypeField
)
356 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
357 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
362 mo_vec
[0].x
= macroblock
->PMV
[0][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
363 mo_vec
[0].y
= macroblock
->PMV
[0][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
365 if (macroblock
->mo_type
== vlMotionTypeField
)
367 mo_vec
[1].x
= macroblock
->PMV
[1][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
368 mo_vec
[1].y
= macroblock
->PMV
[1][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
372 if (macroblock
->mo_type
== vlMotionTypeFrame
)
374 for (i
= 0; i
< 24 * 2; i
+= 2)
376 vb
[i
].x
= mo_vec
[0].x
;
377 vb
[i
].y
= mo_vec
[0].y
;
382 for (i
= 0; i
< 24 * 2; i
+= 2)
384 vb
[i
].x
= mo_vec
[0].x
;
385 vb
[i
].y
= mo_vec
[0].y
;
386 vb
[i
+ 1].x
= mo_vec
[1].x
;
387 vb
[i
+ 1].y
= mo_vec
[1].y
;
391 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][1].buffer
);
395 case vlMacroBlockTypeIntra
:
397 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
400 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][0].buffer
,
401 PIPE_BUFFER_USAGE_CPU_WRITE
402 ) + mc
->num_macroblocks
[mb_type_ex
] * 24;
404 vb
[0].x
= macroblock
->mbx
* unit
.x
; vb
[0].y
= macroblock
->mby
* unit
.y
;
405 vb
[1].x
= macroblock
->mbx
* unit
.x
; vb
[1].y
= macroblock
->mby
* unit
.y
+ half
.y
;
406 vb
[2].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[2].y
= macroblock
->mby
* unit
.y
;
408 vb
[3].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[3].y
= macroblock
->mby
* unit
.y
;
409 vb
[4].x
= macroblock
->mbx
* unit
.x
; vb
[4].y
= macroblock
->mby
* unit
.y
+ half
.y
;
410 vb
[5].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[5].y
= macroblock
->mby
* unit
.y
+ half
.y
;
412 vb
[6].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[6].y
= macroblock
->mby
* unit
.y
;
413 vb
[7].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[7].y
= macroblock
->mby
* unit
.y
+ half
.y
;
414 vb
[8].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[8].y
= macroblock
->mby
* unit
.y
;
416 vb
[9].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[9].y
= macroblock
->mby
* unit
.y
;
417 vb
[10].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[10].y
= macroblock
->mby
* unit
.y
+ half
.y
;
418 vb
[11].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[11].y
= macroblock
->mby
* unit
.y
+ half
.y
;
420 vb
[12].x
= macroblock
->mbx
* unit
.x
; vb
[12].y
= macroblock
->mby
* unit
.y
+ half
.y
;
421 vb
[13].x
= macroblock
->mbx
* unit
.x
; vb
[13].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
422 vb
[14].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[14].y
= macroblock
->mby
* unit
.y
+ half
.y
;
424 vb
[15].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[15].y
= macroblock
->mby
* unit
.y
+ half
.y
;
425 vb
[16].x
= macroblock
->mbx
* unit
.x
; vb
[16].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
426 vb
[17].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[17].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
428 vb
[18].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[18].y
= macroblock
->mby
* unit
.y
+ half
.y
;
429 vb
[19].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[19].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
430 vb
[20].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[20].y
= macroblock
->mby
* unit
.y
+ half
.y
;
432 vb
[21].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[21].y
= macroblock
->mby
* unit
.y
+ half
.y
;
433 vb
[22].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[22].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
434 vb
[23].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[23].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
436 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_type_ex
][0].buffer
);
449 macroblock
->dct_type
,
454 mc
->num_macroblocks
[mb_type_ex
]++;
455 mc
->total_num_macroblocks
++;
460 static int vlGrabMacroBlock
462 struct vlR16SnormBufferedMC
*mc
,
463 struct vlMpeg2MacroBlock
*macroblock
466 const struct vlVertex2f unit
=
468 mc
->surface_tex_inv_size
.x
* VL_MACROBLOCK_WIDTH
,
469 mc
->surface_tex_inv_size
.y
* VL_MACROBLOCK_HEIGHT
471 const struct vlVertex2f half
=
473 mc
->surface_tex_inv_size
.x
* (VL_MACROBLOCK_WIDTH
/ 2),
474 mc
->surface_tex_inv_size
.y
* (VL_MACROBLOCK_HEIGHT
/ 2)
477 struct vlVertex2f
*vb
;
478 unsigned int mb_buf_id
;
479 struct vlVertex2f mo_vec
[2];
485 switch (macroblock
->mb_type
)
487 case vlMacroBlockTypeIntra
:
489 mb_buf_id
= vlMacroBlockExTypeIntra
;
492 case vlMacroBlockTypeFwdPredicted
:
494 mb_buf_id
= macroblock
->mo_type
== vlMotionTypeFrame
?
495 vlMacroBlockExTypeFwdPredictedFrame
: vlMacroBlockExTypeFwdPredictedField
;
498 case vlMacroBlockTypeBkwdPredicted
:
500 mb_buf_id
= macroblock
->mo_type
== vlMotionTypeFrame
?
501 vlMacroBlockExTypeBkwdPredictedFrame
: vlMacroBlockExTypeBkwdPredictedField
;
504 case vlMacroBlockTypeBiPredicted
:
506 mb_buf_id
= macroblock
->mo_type
== vlMotionTypeFrame
?
507 vlMacroBlockExTypeBiPredictedFrame
: vlMacroBlockExTypeBiPredictedField
;
514 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
517 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][0].buffer
,
518 PIPE_BUFFER_USAGE_CPU_WRITE
519 ) + mc
->num_macroblocks
[mb_buf_id
] * 24;
521 vb
[0].x
= macroblock
->mbx
* unit
.x
; vb
[0].y
= macroblock
->mby
* unit
.y
;
522 vb
[1].x
= macroblock
->mbx
* unit
.x
; vb
[1].y
= macroblock
->mby
* unit
.y
+ half
.y
;
523 vb
[2].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[2].y
= macroblock
->mby
* unit
.y
;
525 vb
[3].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[3].y
= macroblock
->mby
* unit
.y
;
526 vb
[4].x
= macroblock
->mbx
* unit
.x
; vb
[4].y
= macroblock
->mby
* unit
.y
+ half
.y
;
527 vb
[5].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[5].y
= macroblock
->mby
* unit
.y
+ half
.y
;
529 vb
[6].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[6].y
= macroblock
->mby
* unit
.y
;
530 vb
[7].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[7].y
= macroblock
->mby
* unit
.y
+ half
.y
;
531 vb
[8].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[8].y
= macroblock
->mby
* unit
.y
;
533 vb
[9].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[9].y
= macroblock
->mby
* unit
.y
;
534 vb
[10].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[10].y
= macroblock
->mby
* unit
.y
+ half
.y
;
535 vb
[11].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[11].y
= macroblock
->mby
* unit
.y
+ half
.y
;
537 vb
[12].x
= macroblock
->mbx
* unit
.x
; vb
[12].y
= macroblock
->mby
* unit
.y
+ half
.y
;
538 vb
[13].x
= macroblock
->mbx
* unit
.x
; vb
[13].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
539 vb
[14].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[14].y
= macroblock
->mby
* unit
.y
+ half
.y
;
541 vb
[15].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[15].y
= macroblock
->mby
* unit
.y
+ half
.y
;
542 vb
[16].x
= macroblock
->mbx
* unit
.x
; vb
[16].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
543 vb
[17].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[17].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
545 vb
[18].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[18].y
= macroblock
->mby
* unit
.y
+ half
.y
;
546 vb
[19].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[19].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
547 vb
[20].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[20].y
= macroblock
->mby
* unit
.y
+ half
.y
;
549 vb
[21].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[21].y
= macroblock
->mby
* unit
.y
+ half
.y
;
550 vb
[22].x
= macroblock
->mbx
* unit
.x
+ half
.x
; vb
[22].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
551 vb
[23].x
= macroblock
->mbx
* unit
.x
+ unit
.x
; vb
[23].y
= macroblock
->mby
* unit
.y
+ unit
.y
;
553 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][0].buffer
);
555 if (macroblock
->mb_type
== vlMacroBlockTypeIntra
)
562 macroblock
->dct_type
,
567 mc
->num_macroblocks
[mb_buf_id
]++;
568 mc
->total_num_macroblocks
++;
572 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
575 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][1].buffer
,
576 PIPE_BUFFER_USAGE_CPU_WRITE
577 ) + mc
->num_macroblocks
[mb_buf_id
] * 2 * 24;
579 if (macroblock
->mb_type
== vlMacroBlockTypeFwdPredicted
|| macroblock
->mb_type
== vlMacroBlockTypeBiPredicted
)
581 mo_vec
[0].x
= macroblock
->PMV
[0][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
582 mo_vec
[0].y
= macroblock
->PMV
[0][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
584 if (macroblock
->mo_type
== vlMotionTypeField
)
586 mo_vec
[1].x
= macroblock
->PMV
[1][0][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
587 mo_vec
[1].y
= macroblock
->PMV
[1][0][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
592 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
593 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
595 if (macroblock
->mo_type
== vlMotionTypeField
)
597 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
598 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
602 if (macroblock
->mo_type
== vlMotionTypeFrame
)
604 for (i
= 0; i
< 24 * 2; i
+= 2)
606 vb
[i
].x
= mo_vec
[0].x
;
607 vb
[i
].y
= mo_vec
[0].y
;
612 for (i
= 0; i
< 24 * 2; i
+= 2)
614 vb
[i
].x
= mo_vec
[0].x
;
615 vb
[i
].y
= mo_vec
[0].y
;
616 vb
[i
+ 1].x
= mo_vec
[1].x
;
617 vb
[i
+ 1].y
= mo_vec
[1].y
;
621 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][1].buffer
);
623 if (macroblock
->mb_type
!= vlMacroBlockTypeBiPredicted
)
630 macroblock
->dct_type
,
635 mc
->num_macroblocks
[mb_buf_id
]++;
636 mc
->total_num_macroblocks
++;
640 vb
= (struct vlVertex2f
*)mc
->pipe
->winsys
->buffer_map
643 mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][2].buffer
,
644 PIPE_BUFFER_USAGE_CPU_WRITE
645 ) + mc
->num_macroblocks
[mb_buf_id
] * 2 * 24;
647 mo_vec
[0].x
= macroblock
->PMV
[0][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
648 mo_vec
[0].y
= macroblock
->PMV
[0][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
650 if (macroblock
->mo_type
== vlMotionTypeFrame
)
652 for (i
= 0; i
< 24 * 2; i
+= 2)
654 vb
[i
].x
= mo_vec
[0].x
;
655 vb
[i
].y
= mo_vec
[0].y
;
660 mo_vec
[1].x
= macroblock
->PMV
[1][1][0] * 0.5f
* mc
->surface_tex_inv_size
.x
;
661 mo_vec
[1].y
= macroblock
->PMV
[1][1][1] * 0.5f
* mc
->surface_tex_inv_size
.y
;
663 for (i
= 0; i
< 24 * 2; i
+= 2)
665 vb
[i
].x
= mo_vec
[0].x
;
666 vb
[i
].y
= mo_vec
[0].y
;
667 vb
[i
+ 1].x
= mo_vec
[1].x
;
668 vb
[i
+ 1].y
= mo_vec
[1].y
;
672 mc
->pipe
->winsys
->buffer_unmap(mc
->pipe
->winsys
, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][mb_buf_id
][2].buffer
);
679 macroblock
->dct_type
,
684 mc
->num_macroblocks
[mb_buf_id
]++;
685 mc
->total_num_macroblocks
++;
693 struct vlRender
*render
696 struct vlR16SnormBufferedMC
*mc
;
697 struct pipe_context
*pipe
;
698 struct vlVertexShaderConsts
*vs_consts
;
702 mc
= (struct vlR16SnormBufferedMC
*)render
;
705 mc
->render_target
.cbufs
[0] = pipe
->screen
->get_tex_surface
708 mc
->buffered_surface
->texture
,
709 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ
| PIPE_BUFFER_USAGE_GPU_WRITE
712 pipe
->set_framebuffer_state(pipe
, &mc
->render_target
);
713 pipe
->set_viewport_state(pipe
, &mc
->viewport
);
714 vs_consts
= pipe
->winsys
->buffer_map
717 mc
->vs_const_buf
.buffer
,
718 PIPE_BUFFER_USAGE_CPU_WRITE
721 vs_consts
->denorm
.x
= mc
->buffered_surface
->texture
->width
[0];
722 vs_consts
->denorm
.y
= mc
->buffered_surface
->texture
->height
[0];
724 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
725 pipe
->set_constant_buffer(pipe
, PIPE_SHADER_FRAGMENT
, 0, &mc
->fs_const_buf
);
727 if (mc
->num_macroblocks
[vlMacroBlockExTypeIntra
] > 0)
729 pipe
->set_vertex_buffers(pipe
, 1, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeIntra
]);
730 pipe
->set_vertex_elements(pipe
, 1, mc
->vertex_elems
);
731 pipe
->set_sampler_textures(pipe
, 3, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
732 pipe
->bind_sampler_states(pipe
, 3, (void**)mc
->samplers
);
733 pipe
->bind_vs_state(pipe
, mc
->i_vs
);
734 pipe
->bind_fs_state(pipe
, mc
->i_fs
);
736 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeIntra
] * 24);
739 if (mc
->num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] > 0)
741 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeFwdPredictedFrame
]);
742 pipe
->set_vertex_elements(pipe
, 3, mc
->vertex_elems
);
743 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
744 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
745 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
746 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
747 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
749 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeFwdPredictedFrame
] * 24);
752 if (mc
->num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] > 0)
754 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeFwdPredictedField
]);
755 pipe
->set_vertex_elements(pipe
, 3, mc
->vertex_elems
);
756 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
757 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
758 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
759 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
760 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
762 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeFwdPredictedField
] * 24);
765 if (mc
->num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] > 0)
767 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeBkwdPredictedFrame
]);
768 pipe
->set_vertex_elements(pipe
, 3, mc
->vertex_elems
);
769 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
770 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
771 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
772 pipe
->bind_vs_state(pipe
, mc
->p_vs
[0]);
773 pipe
->bind_fs_state(pipe
, mc
->p_fs
[0]);
775 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeBkwdPredictedFrame
] * 24);
778 if (mc
->num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] > 0)
780 pipe
->set_vertex_buffers(pipe
, 2, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeBkwdPredictedField
]);
781 pipe
->set_vertex_elements(pipe
, 3, mc
->vertex_elems
);
782 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->future_surface
->texture
;
783 pipe
->set_sampler_textures(pipe
, 4, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
784 pipe
->bind_sampler_states(pipe
, 4, (void**)mc
->samplers
);
785 pipe
->bind_vs_state(pipe
, mc
->p_vs
[1]);
786 pipe
->bind_fs_state(pipe
, mc
->p_fs
[1]);
788 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeBkwdPredictedField
] * 24);
791 if (mc
->num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] > 0)
793 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeBiPredictedFrame
]);
794 pipe
->set_vertex_elements(pipe
, 5, mc
->vertex_elems
);
795 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
796 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
797 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
798 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
799 pipe
->bind_vs_state(pipe
, mc
->b_vs
[0]);
800 pipe
->bind_fs_state(pipe
, mc
->b_fs
[0]);
802 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeBiPredictedFrame
] * 24);
805 if (mc
->num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] > 0)
807 pipe
->set_vertex_buffers(pipe
, 3, mc
->vertex_bufs
[mc
->cur_buf
% NUM_BUF_SETS
][vlMacroBlockExTypeBiPredictedField
]);
808 pipe
->set_vertex_elements(pipe
, 5, mc
->vertex_elems
);
809 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][3] = mc
->past_surface
->texture
;
810 mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
][4] = mc
->future_surface
->texture
;
811 pipe
->set_sampler_textures(pipe
, 5, mc
->textures
[mc
->cur_buf
% NUM_BUF_SETS
]);
812 pipe
->bind_sampler_states(pipe
, 5, (void**)mc
->samplers
);
813 pipe
->bind_vs_state(pipe
, mc
->b_vs
[1]);
814 pipe
->bind_fs_state(pipe
, mc
->b_fs
[1]);
816 pipe
->draw_arrays(pipe
, PIPE_PRIM_TRIANGLES
, 0, mc
->num_macroblocks
[vlMacroBlockExTypeBiPredictedField
] * 24);
819 memset(mc
->num_macroblocks
, 0, sizeof(unsigned int) * 7);
820 mc
->total_num_macroblocks
= 0;
825 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
827 struct vlRender
*render
,
828 struct vlMpeg2MacroBlockBatch
*batch
,
829 struct vlSurface
*surface
832 struct vlR16SnormBufferedMC
*mc
;
837 mc
= (struct vlR16SnormBufferedMC
*)render
;
839 if (mc
->buffered_surface
)
843 mc
->buffered_surface
!= surface
/*||
844 mc->past_surface != batch->past_surface ||
845 mc->future_surface != batch->future_surface*/
849 mc
->buffered_surface
= surface
;
850 mc
->past_surface
= batch
->past_surface
;
851 mc
->future_surface
= batch
->future_surface
;
852 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
853 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
858 mc
->buffered_surface
= surface
;
859 mc
->past_surface
= batch
->past_surface
;
860 mc
->future_surface
= batch
->future_surface
;
861 mc
->surface_tex_inv_size
.x
= 1.0f
/ surface
->texture
->width
[0];
862 mc
->surface_tex_inv_size
.y
= 1.0f
/ surface
->texture
->height
[0];
865 for (i
= 0; i
< batch
->num_macroblocks
; ++i
)
866 vlGrabMacroBlock(mc
, &batch
->macroblocks
[i
]);
873 struct vlRender
*render
883 struct vlRender
*render
886 struct vlR16SnormBufferedMC
*mc
;
887 struct pipe_context
*pipe
;
888 unsigned int g
, h
, i
;
892 mc
= (struct vlR16SnormBufferedMC
*)render
;
895 for (i
= 0; i
< 5; ++i
)
896 pipe
->delete_sampler_state(pipe
, mc
->samplers
[i
]);
898 for (g
= 0; g
< NUM_BUF_SETS
; ++g
)
899 for (h
= 0; h
< 7; ++h
)
900 for (i
= 0; i
< 3; ++i
)
901 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vertex_bufs
[g
][h
][i
].buffer
);
903 /* Textures 3 & 4 are not created directly, no need to release them here */
904 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
906 pipe_texture_release(&mc
->textures
[i
][0]);
907 pipe_texture_release(&mc
->textures
[i
][1]);
908 pipe_texture_release(&mc
->textures
[i
][2]);
911 pipe
->delete_vs_state(pipe
, mc
->i_vs
);
912 pipe
->delete_fs_state(pipe
, mc
->i_fs
);
914 for (i
= 0; i
< 2; ++i
)
916 pipe
->delete_vs_state(pipe
, mc
->p_vs
[i
]);
917 pipe
->delete_fs_state(pipe
, mc
->p_fs
[i
]);
918 pipe
->delete_vs_state(pipe
, mc
->b_vs
[i
]);
919 pipe
->delete_fs_state(pipe
, mc
->b_fs
[i
]);
922 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->vs_const_buf
.buffer
);
923 pipe
->winsys
->buffer_destroy(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
931 * Muliplier renormalizes block samples from 16 bits to 12 bits.
932 * Divider is used when calculating Y % 2 for choosing top or bottom
933 * field for P or B macroblocks.
934 * TODO: Use immediates.
936 static const struct vlFragmentShaderConsts fs_consts
=
938 {32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 32767.0f
/ 255.0f
, 0.0f
},
939 {0.5f
, 2.0f
, 0.0f
, 0.0f
}
942 static int vlCreateVertexShaderIMB
944 struct vlR16SnormBufferedMC
*mc
947 const unsigned int max_tokens
= 50;
949 struct pipe_context
*pipe
;
950 struct pipe_shader_state vs
;
951 struct tgsi_token
*tokens
;
952 struct tgsi_header
*header
;
954 struct tgsi_full_declaration decl
;
955 struct tgsi_full_instruction inst
;
963 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
966 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
968 header
= (struct tgsi_header
*)&tokens
[1];
969 *header
= tgsi_build_header();
971 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
976 * decl i0 ; Vertex pos, luma & chroma texcoords
978 for (i
= 0; i
< 3; i
++)
980 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
981 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
985 * decl o0 ; Vertex pos
986 * decl o1 ; Luma/chroma texcoords
988 for (i
= 0; i
< 2; i
++)
990 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
991 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
995 * mov o0, i0 ; Move input vertex pos to output
996 * mov o1, i0 ; Move input luma/chroma texcoords to output
998 for (i
= 0; i
< 2; ++i
)
1000 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, 0);
1001 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1006 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1009 mc
->i_vs
= pipe
->create_vs_state(pipe
, &vs
);
1015 static int vlCreateFragmentShaderIMB
1017 struct vlR16SnormBufferedMC
*mc
1020 const unsigned int max_tokens
= 100;
1022 struct pipe_context
*pipe
;
1023 struct pipe_shader_state fs
;
1024 struct tgsi_token
*tokens
;
1025 struct tgsi_header
*header
;
1027 struct tgsi_full_declaration decl
;
1028 struct tgsi_full_instruction inst
;
1036 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1039 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1041 header
= (struct tgsi_header
*)&tokens
[1];
1042 *header
= tgsi_build_header();
1044 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1048 /* decl i0 ; Luma/chroma texcoords */
1049 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, 1, 0, 0, TGSI_INTERPOLATE_LINEAR
);
1050 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1052 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1053 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1054 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1056 /* decl o0 ; Fragment color */
1057 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1058 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1061 decl
= vl_decl_temps(0, 1);
1062 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1065 * decl s0 ; Sampler for luma texture
1066 * decl s1 ; Sampler for chroma Cb texture
1067 * decl s2 ; Sampler for chroma Cr texture
1069 for (i
= 0; i
< 3; ++i
)
1071 decl
= vl_decl_samplers(i
, i
);
1072 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
,max_tokens
- ti
);
1076 * tex2d t1, i0, s0 ; Read texel from luma texture
1077 * mov t0.x, t1.x ; Move luma sample into .x component
1078 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1079 * mov t0.y, t1.x ; Move Cb sample into .y component
1080 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1081 * mov t0.z, t1.x ; Move Cr sample into .z component
1083 for (i
= 0; i
< 3; ++i
)
1085 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 0, TGSI_FILE_SAMPLER
, i
);
1086 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1088 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1089 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1090 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1091 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1092 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1093 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1097 /* mul o0, t0, c0 ; Rescale texel to correct range */
1098 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1099 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1103 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1106 mc
->i_fs
= pipe
->create_fs_state(pipe
, &fs
);
1112 static int vlCreateVertexShaderFramePMB
1114 struct vlR16SnormBufferedMC
*mc
1117 const unsigned int max_tokens
= 100;
1119 struct pipe_context
*pipe
;
1120 struct pipe_shader_state vs
;
1121 struct tgsi_token
*tokens
;
1122 struct tgsi_header
*header
;
1124 struct tgsi_full_declaration decl
;
1125 struct tgsi_full_instruction inst
;
1133 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1136 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1138 header
= (struct tgsi_header
*)&tokens
[1];
1139 *header
= tgsi_build_header();
1141 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1146 * decl i0 ; Vertex pos, luma/chroma texcoords
1147 * decl i1 ; Ref surface top field texcoords
1148 * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1150 for (i
= 0; i
< 3; i
++)
1152 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1153 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1157 * decl o0 ; Vertex pos
1158 * decl o1 ; Luma/chroma texcoords
1159 * decl o2 ; Ref macroblock texcoords
1161 for (i
= 0; i
< 3; i
++)
1163 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1164 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1168 * mov o0, i0 ; Move input vertex pos to output
1169 * mov o1, i0 ; Move input luma/chroma texcoords to output
1171 for (i
= 0; i
< 2; ++i
)
1173 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, 0);
1174 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1177 /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1178 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 2, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, 1);
1179 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1183 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1186 mc
->p_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1192 static int vlCreateVertexShaderFieldPMB
1194 struct vlR16SnormBufferedMC
*mc
1197 const unsigned int max_tokens
= 100;
1199 struct pipe_context
*pipe
;
1200 struct pipe_shader_state vs
;
1201 struct tgsi_token
*tokens
;
1202 struct tgsi_header
*header
;
1204 struct tgsi_full_declaration decl
;
1205 struct tgsi_full_instruction inst
;
1213 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1216 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1218 header
= (struct tgsi_header
*)&tokens
[1];
1219 *header
= tgsi_build_header();
1221 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1226 * decl i0 ; Vertex pos, luma/chroma texcoords
1227 * decl i1 ; Ref surface top field texcoords
1228 * decl i2 ; Ref surface bottom field texcoords
1230 for (i
= 0; i
< 3; i
++)
1232 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1233 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1236 /* decl c0 ; Texcoord denorm coefficients */
1237 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1238 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1241 * decl o0 ; Vertex pos
1242 * decl o1 ; Luma/chroma texcoords
1243 * decl o2 ; Top field ref macroblock texcoords
1244 * decl o3 ; Bottom field ref macroblock texcoords
1245 * decl o4 ; Denormalized vertex pos
1247 for (i
= 0; i
< 5; i
++)
1249 decl
= vl_decl_output((i
== 0 || i
== 5) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1250 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1254 * mov o0, i0 ; Move input vertex pos to output
1255 * mov o1, i0 ; Move input luma/chroma texcoords to output
1257 for (i
= 0; i
< 3; ++i
)
1259 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, i
== 0 ? 0 : i
- 1);
1260 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1264 * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1265 * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1267 for (i
= 0; i
< 2; ++i
)
1269 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 2, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 1);
1270 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1273 /* mul o4, i0, c0 ; Denorm vertex pos */
1274 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 5, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1275 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1279 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1282 mc
->p_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1288 static int vlCreateFragmentShaderFramePMB
1290 struct vlR16SnormBufferedMC
*mc
1293 const unsigned int max_tokens
= 100;
1295 struct pipe_context
*pipe
;
1296 struct pipe_shader_state fs
;
1297 struct tgsi_token
*tokens
;
1298 struct tgsi_header
*header
;
1300 struct tgsi_full_declaration decl
;
1301 struct tgsi_full_instruction inst
;
1309 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1312 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1314 header
= (struct tgsi_header
*)&tokens
[1];
1315 *header
= tgsi_build_header();
1317 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1322 * decl i0 ; Texcoords for s0, s1, s2
1323 * decl i1 ; Texcoords for s3
1325 for (i
= 0; i
< 2; ++i
)
1327 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1328 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1331 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1332 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 0);
1333 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1335 /* decl o0 ; Fragment color */
1336 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1337 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1340 decl
= vl_decl_temps(0, 1);
1341 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1344 * decl s0 ; Sampler for luma texture
1345 * decl s1 ; Sampler for chroma Cb texture
1346 * decl s2 ; Sampler for chroma Cr texture
1347 * decl s3 ; Sampler for ref surface texture
1349 for (i
= 0; i
< 4; ++i
)
1351 decl
= vl_decl_samplers(i
, i
);
1352 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1356 * tex2d t1, i0, s0 ; Read texel from luma texture
1357 * mov t0.x, t1.x ; Move luma sample into .x component
1358 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1359 * mov t0.y, t1.x ; Move Cb sample into .y component
1360 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1361 * mov t0.z, t1.x ; Move Cr sample into .z component
1363 for (i
= 0; i
< 3; ++i
)
1365 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 0, TGSI_FILE_SAMPLER
, i
);
1366 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1368 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1369 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1370 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1371 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1372 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1373 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1377 /* mul t0, t0, c0 ; Rescale texel to correct range */
1378 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1379 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1381 /* tex2d t1, i1, s3 ; Read texel from ref macroblock */
1382 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 1, TGSI_FILE_SAMPLER
, 3);
1383 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1385 /* add o0, t0, t1 ; Add ref and differential to form final output */
1386 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1387 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1391 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1394 mc
->p_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1400 static int vlCreateFragmentShaderFieldPMB
1402 struct vlR16SnormBufferedMC
*mc
1405 const unsigned int max_tokens
= 200;
1407 struct pipe_context
*pipe
;
1408 struct pipe_shader_state fs
;
1409 struct tgsi_token
*tokens
;
1410 struct tgsi_header
*header
;
1412 struct tgsi_full_declaration decl
;
1413 struct tgsi_full_instruction inst
;
1421 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1424 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1426 header
= (struct tgsi_header
*)&tokens
[1];
1427 *header
= tgsi_build_header();
1429 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1434 * decl i0 ; Texcoords for s0, s1, s2
1435 * decl i1 ; Texcoords for s3
1436 * decl i2 ; Texcoords for s3
1437 * decl i3 ; Denormalized vertex pos
1439 for (i
= 0; i
< 4; ++i
)
1441 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1442 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1446 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1447 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1449 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1450 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1452 /* decl o0 ; Fragment color */
1453 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1454 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1457 decl
= vl_decl_temps(0, 4);
1458 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1461 * decl s0 ; Sampler for luma texture
1462 * decl s1 ; Sampler for chroma Cb texture
1463 * decl s2 ; Sampler for chroma Cr texture
1464 * decl s3 ; Sampler for ref surface texture
1466 for (i
= 0; i
< 4; ++i
)
1468 decl
= vl_decl_samplers(i
, i
);
1469 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1473 * tex2d t1, i0, s0 ; Read texel from luma texture
1474 * mov t0.x, t1.x ; Move luma sample into .x component
1475 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1476 * mov t0.y, t1.x ; Move Cb sample into .y component
1477 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1478 * mov t0.z, t1.x ; Move Cr sample into .z component
1480 for (i
= 0; i
< 3; ++i
)
1482 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 0, TGSI_FILE_SAMPLER
, i
);
1483 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1485 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1486 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1487 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1488 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1489 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1490 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1494 /* mul t0, t0, c0 ; Rescale texel to correct range */
1495 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1496 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1499 * tex2d t1, i1, s3 ; Read texel from ref macroblock top field
1500 * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field
1502 for (i
= 0; i
< 2; ++i
)
1504 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 1, TGSI_FILE_SAMPLER
, 3);
1505 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1508 /* XXX: Pos values off by 0.5? */
1509 /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */
1510 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 3, TGSI_FILE_CONSTANT
, 1);
1511 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1512 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1513 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1514 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1515 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1516 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1517 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1518 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1519 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1521 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1522 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
1523 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1524 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1525 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1526 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1527 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1529 /* floor t3, t3 ; Get rid of fractional part */
1530 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
1531 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1533 /* mul t3, t3, c1.y ; Multiply by 2 */
1534 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
1535 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
1536 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
1537 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
1538 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
1539 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1541 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1542 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
1543 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1545 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1546 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1547 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1548 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1550 /* add o0, t0, t1 ; Add ref and differential to form final output */
1551 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1552 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1556 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1559 mc
->p_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
1565 static int vlCreateVertexShaderFrameBMB
1567 struct vlR16SnormBufferedMC
*mc
1570 const unsigned int max_tokens
= 100;
1572 struct pipe_context
*pipe
;
1573 struct pipe_shader_state vs
;
1574 struct tgsi_token
*tokens
;
1575 struct tgsi_header
*header
;
1577 struct tgsi_full_declaration decl
;
1578 struct tgsi_full_instruction inst
;
1586 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1589 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1591 header
= (struct tgsi_header
*)&tokens
[1];
1592 *header
= tgsi_build_header();
1594 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1599 * decl i0 ; Vertex pos, luma/chroma texcoords
1600 * decl i1 ; First ref surface top field texcoords
1601 * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream)
1602 * decl i3 ; Second ref surface top field texcoords
1603 * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream)
1605 for (i
= 0; i
< 5; i
++)
1607 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1608 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1612 * decl o0 ; Vertex pos
1613 * decl o1 ; Luma/chroma texcoords
1614 * decl o2 ; First ref macroblock texcoords
1615 * decl o3 ; Second ref macroblock texcoords
1617 for (i
= 0; i
< 4; i
++)
1619 decl
= vl_decl_output(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1620 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1624 * mov o0, i0 ; Move input vertex pos to output
1625 * mov o1, i0 ; Move input luma/chroma texcoords to output
1627 for (i
= 0; i
< 2; ++i
)
1629 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, 0);
1630 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1634 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1635 * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1637 for (i
= 0; i
< 2; ++i
)
1639 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 2, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
* 2 + 1);
1640 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1645 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1648 mc
->b_vs
[0] = pipe
->create_vs_state(pipe
, &vs
);
1654 static int vlCreateVertexShaderFieldBMB
1656 struct vlR16SnormBufferedMC
*mc
1659 const unsigned int max_tokens
= 100;
1661 struct pipe_context
*pipe
;
1662 struct pipe_shader_state vs
;
1663 struct tgsi_token
*tokens
;
1664 struct tgsi_header
*header
;
1666 struct tgsi_full_declaration decl
;
1667 struct tgsi_full_instruction inst
;
1675 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1678 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1680 header
= (struct tgsi_header
*)&tokens
[1];
1681 *header
= tgsi_build_header();
1683 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX
, header
);
1688 * decl i0 ; Vertex pos, Luma/chroma texcoords
1689 * decl i1 ; First ref surface top field texcoords
1690 * decl i2 ; First ref surface bottom field texcoords
1691 * decl i3 ; Second ref surface top field texcoords
1692 * decl i4 ; Second ref surface bottom field texcoords
1694 for (i
= 0; i
< 5; i
++)
1696 decl
= vl_decl_input(i
== 0 ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1697 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1700 /* decl c0 ; Denorm coefficients */
1701 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 6);
1702 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1705 * decl o0 ; Vertex pos
1706 * decl o1 ; Luma/chroma texcoords
1707 * decl o2 ; Top field past ref macroblock texcoords
1708 * decl o3 ; Bottom field past ref macroblock texcoords
1709 * decl o4 ; Top field future ref macroblock texcoords
1710 * decl o5 ; Bottom field future ref macroblock texcoords
1711 * decl o6 ; Denormalized vertex pos
1713 for (i
= 0; i
< 7; i
++)
1715 decl
= vl_decl_output((i
== 0 || i
== 7) ? TGSI_SEMANTIC_POSITION
: TGSI_SEMANTIC_GENERIC
, i
, i
, i
);
1716 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1720 decl
= vl_decl_temps(0, 1);
1721 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1724 * mov o0, i0 ; Move input vertex pos to output
1725 * mov o1, i0 ; Move input luma/chroma texcoords to output
1726 * mov o2, i1 ; Move past top field texcoords to output
1727 * mov o3, i2 ; Move past bottom field texcoords to output
1728 * mov o4, i3 ; Move future top field texcoords to output
1729 * mov o5, i4 ; Move future bottom field texcoords to output
1731 for (i
= 0; i
< 6; ++i
)
1733 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_OUTPUT
, i
, TGSI_FILE_INPUT
, 0);
1734 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1738 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1739 * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1740 * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1741 * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1743 for (i
= 0; i
< 4; ++i
)
1745 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, i
+ 2, TGSI_FILE_INPUT
, 0, TGSI_FILE_INPUT
, i
+ 1);
1746 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1749 /* mul o6, i0, c0 ; Denorm vertex pos */
1750 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_OUTPUT
, 6, TGSI_FILE_INPUT
, 0, TGSI_FILE_CONSTANT
, 0);
1751 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1755 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1758 mc
->b_vs
[1] = pipe
->create_vs_state(pipe
, &vs
);
1764 static int vlCreateFragmentShaderFrameBMB
1766 struct vlR16SnormBufferedMC
*mc
1769 const unsigned int max_tokens
= 100;
1771 struct pipe_context
*pipe
;
1772 struct pipe_shader_state fs
;
1773 struct tgsi_token
*tokens
;
1774 struct tgsi_header
*header
;
1776 struct tgsi_full_declaration decl
;
1777 struct tgsi_full_instruction inst
;
1785 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1788 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1790 header
= (struct tgsi_header
*)&tokens
[1];
1791 *header
= tgsi_build_header();
1793 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1798 * decl i0 ; Texcoords for s0, s1, s2
1799 * decl i1 ; Texcoords for s3
1800 * decl i2 ; Texcoords for s4
1802 for (i
= 0; i
< 3; ++i
)
1804 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1805 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1809 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1810 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1812 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1813 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1815 /* decl o0 ; Fragment color */
1816 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1817 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1820 decl
= vl_decl_temps(0, 2);
1821 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1824 * decl s0 ; Sampler for luma texture
1825 * decl s1 ; Sampler for chroma Cb texture
1826 * decl s2 ; Sampler for chroma Cr texture
1827 * decl s3 ; Sampler for past ref surface texture
1828 * decl s4 ; Sampler for future ref surface texture
1830 for (i
= 0; i
< 5; ++i
)
1832 decl
= vl_decl_samplers(i
, i
);
1833 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1837 * tex2d t1, i0, s0 ; Read texel from luma texture
1838 * mov t0.x, t1.x ; Move luma sample into .x component
1839 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1840 * mov t0.y, t1.x ; Move Cb sample into .y component
1841 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1842 * mov t0.z, t1.x ; Move Cr sample into .z component
1844 for (i
= 0; i
< 3; ++i
)
1846 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 0, TGSI_FILE_SAMPLER
, i
);
1847 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1849 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1850 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1851 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1852 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1853 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1854 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1858 /* mul t0, t0, c0 ; Rescale texel to correct range */
1859 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1860 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1863 * tex2d t1, i1, s3 ; Read texel from past ref macroblock
1864 * tex2d t2, i2, s4 ; Read texel from future ref macroblock
1866 for (i
= 0; i
< 2; ++i
)
1868 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 1, TGSI_FILE_SAMPLER
, i
+ 3);
1869 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1872 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1873 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
1874 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1875 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1876 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1877 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
1878 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1880 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1881 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1882 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1886 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1889 mc
->b_fs
[0] = pipe
->create_fs_state(pipe
, &fs
);
1895 static int vlCreateFragmentShaderFieldBMB
1897 struct vlR16SnormBufferedMC
*mc
1900 const unsigned int max_tokens
= 200;
1902 struct pipe_context
*pipe
;
1903 struct pipe_shader_state fs
;
1904 struct tgsi_token
*tokens
;
1905 struct tgsi_header
*header
;
1907 struct tgsi_full_declaration decl
;
1908 struct tgsi_full_instruction inst
;
1916 tokens
= (struct tgsi_token
*)malloc(max_tokens
* sizeof(struct tgsi_token
));
1919 *(struct tgsi_version
*)&tokens
[0] = tgsi_build_version();
1921 header
= (struct tgsi_header
*)&tokens
[1];
1922 *header
= tgsi_build_header();
1924 *(struct tgsi_processor
*)&tokens
[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT
, header
);
1929 * decl i0 ; Texcoords for s0, s1, s2
1930 * decl i1 ; Texcoords for s3
1931 * decl i2 ; Texcoords for s3
1932 * decl i3 ; Texcoords for s4
1933 * decl i4 ; Texcoords for s4
1934 * decl i5 ; Denormalized vertex pos
1936 for (i
= 0; i
< 6; ++i
)
1938 decl
= vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC
, i
+ 1, i
, i
, TGSI_INTERPOLATE_LINEAR
);
1939 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1943 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1944 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1945 * ; and for Y-mod-2 top/bottom field selection
1947 decl
= vl_decl_constants(TGSI_SEMANTIC_GENERIC
, 0, 0, 1);
1948 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1950 /* decl o0 ; Fragment color */
1951 decl
= vl_decl_output(TGSI_SEMANTIC_COLOR
, 0, 0, 0);
1952 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1955 decl
= vl_decl_temps(0, 5);
1956 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1959 * decl s0 ; Sampler for luma texture
1960 * decl s1 ; Sampler for chroma Cb texture
1961 * decl s2 ; Sampler for chroma Cr texture
1962 * decl s3 ; Sampler for past ref surface texture
1963 * decl s4 ; Sampler for future ref surface texture
1965 for (i
= 0; i
< 5; ++i
)
1967 decl
= vl_decl_samplers(i
, i
);
1968 ti
+= tgsi_build_full_declaration(&decl
, &tokens
[ti
], header
, max_tokens
- ti
);
1972 * tex2d t1, i0, s0 ; Read texel from luma texture
1973 * mov t0.x, t1.x ; Move luma sample into .x component
1974 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1975 * mov t0.y, t1.x ; Move Cb sample into .y component
1976 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1977 * mov t0.z, t1.x ; Move Cr sample into .z component
1979 for (i
= 0; i
< 3; ++i
)
1981 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_INPUT
, 0, TGSI_FILE_SAMPLER
, i
);
1982 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1984 inst
= vl_inst2(TGSI_OPCODE_MOV
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
1985 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
1986 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
1987 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
1988 inst
.FullDstRegisters
[0].DstRegister
.WriteMask
= TGSI_WRITEMASK_X
<< i
;
1989 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1993 /* mul t0, t0, c0 ; Rescale texel to correct range */
1994 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_CONSTANT
, 0);
1995 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
1997 /* XXX: Pos values off by 0.5? */
1998 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1999 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_INPUT
, 5, TGSI_FILE_CONSTANT
, 1);
2000 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
2001 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
2002 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
2003 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
2004 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2005 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2006 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2007 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2008 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2010 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
2011 inst
= vl_inst3(TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_CONSTANT
, 1);
2012 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2013 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2014 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2015 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2016 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2018 /* floor t3, t3 ; Get rid of fractional part */
2019 inst
= vl_inst2(TGSI_OPCODE_FLOOR
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3);
2020 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2022 /* mul t3, t3, c1.y ; Multiply by 2 */
2023 inst
= vl_inst3( TGSI_OPCODE_MUL
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_CONSTANT
, 1);
2024 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_Y
;
2025 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_Y
;
2026 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_Y
;
2027 inst
.FullSrcRegisters
[1].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_Y
;
2028 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2030 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
2031 inst
= vl_inst3(TGSI_OPCODE_SUB
, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 3);
2032 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2035 * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field
2036 * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field
2038 for (i
= 0; i
< 2; ++i
)
2040 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 1, TGSI_FILE_INPUT
, i
+ 1, TGSI_FILE_SAMPLER
, 3);
2041 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2044 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2045 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2046 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2047 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2050 * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field
2051 * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field
2053 for (i
= 0; i
< 2; ++i
)
2055 inst
= vl_tex(TGSI_TEXTURE_2D
, TGSI_FILE_TEMPORARY
, i
+ 4, TGSI_FILE_INPUT
, i
+ 3, TGSI_FILE_SAMPLER
, 4);
2056 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2059 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2060 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2061 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 2, TGSI_FILE_TEMPORARY
, 3, TGSI_FILE_TEMPORARY
, 4, TGSI_FILE_TEMPORARY
, 5);
2062 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2064 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2065 inst
= vl_inst4(TGSI_OPCODE_LERP
, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_CONSTANT
, 1, TGSI_FILE_TEMPORARY
, 1, TGSI_FILE_TEMPORARY
, 2);
2066 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleX
= TGSI_SWIZZLE_X
;
2067 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleY
= TGSI_SWIZZLE_X
;
2068 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleZ
= TGSI_SWIZZLE_X
;
2069 inst
.FullSrcRegisters
[0].SrcRegister
.SwizzleW
= TGSI_SWIZZLE_X
;
2070 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2072 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2073 inst
= vl_inst3(TGSI_OPCODE_ADD
, TGSI_FILE_OUTPUT
, 0, TGSI_FILE_TEMPORARY
, 0, TGSI_FILE_TEMPORARY
, 1);
2074 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2078 ti
+= tgsi_build_full_instruction(&inst
, &tokens
[ti
], header
, max_tokens
- ti
);
2081 mc
->b_fs
[1] = pipe
->create_fs_state(pipe
, &fs
);
2087 static int vlCreateDataBufs
2089 struct vlR16SnormBufferedMC
*mc
2092 const unsigned int mbw
= align(mc
->video_width
, VL_MACROBLOCK_WIDTH
) / VL_MACROBLOCK_WIDTH
;
2093 const unsigned int mbh
= align(mc
->video_height
, VL_MACROBLOCK_HEIGHT
) / VL_MACROBLOCK_HEIGHT
;
2094 const unsigned int num_mb_per_frame
= mbw
* mbh
;
2096 struct pipe_context
*pipe
;
2097 unsigned int g
, h
, i
;
2103 for (g
= 0; g
< NUM_BUF_SETS
; ++g
)
2105 for (h
= 0; h
< 7; ++h
)
2107 /* Create our vertex buffer and vertex buffer element */
2108 mc
->vertex_bufs
[g
][h
][0].pitch
= sizeof(struct vlVertex2f
);
2109 mc
->vertex_bufs
[g
][h
][0].max_index
= 24 * num_mb_per_frame
- 1;
2110 mc
->vertex_bufs
[g
][h
][0].buffer_offset
= 0;
2111 mc
->vertex_bufs
[g
][h
][0].buffer
= pipe
->winsys
->buffer_create
2115 PIPE_BUFFER_USAGE_VERTEX
,
2116 sizeof(struct vlVertex2f
) * 24 * num_mb_per_frame
2121 /* Position & block luma, block chroma texcoord element */
2122 mc
->vertex_elems
[0].src_offset
= 0;
2123 mc
->vertex_elems
[0].vertex_buffer_index
= 0;
2124 mc
->vertex_elems
[0].nr_components
= 2;
2125 mc
->vertex_elems
[0].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2127 for (g
= 0; g
< NUM_BUF_SETS
; ++g
)
2129 for (h
= 0; h
< 7; ++h
)
2131 for (i
= 1; i
< 3; ++i
)
2133 mc
->vertex_bufs
[g
][h
][i
].pitch
= sizeof(struct vlVertex2f
) * 2;
2134 mc
->vertex_bufs
[g
][h
][i
].max_index
= 24 * num_mb_per_frame
- 1;
2135 mc
->vertex_bufs
[g
][h
][i
].buffer_offset
= 0;
2136 mc
->vertex_bufs
[g
][h
][i
].buffer
= pipe
->winsys
->buffer_create
2140 PIPE_BUFFER_USAGE_VERTEX
,
2141 sizeof(struct vlVertex2f
) * 2 * 24 * num_mb_per_frame
2147 /* First ref surface top field texcoord element */
2148 mc
->vertex_elems
[1].src_offset
= 0;
2149 mc
->vertex_elems
[1].vertex_buffer_index
= 1;
2150 mc
->vertex_elems
[1].nr_components
= 2;
2151 mc
->vertex_elems
[1].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2153 /* First ref surface bottom field texcoord element */
2154 mc
->vertex_elems
[2].src_offset
= sizeof(struct vlVertex2f
);
2155 mc
->vertex_elems
[2].vertex_buffer_index
= 1;
2156 mc
->vertex_elems
[2].nr_components
= 2;
2157 mc
->vertex_elems
[2].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2159 /* Second ref surface top field texcoord element */
2160 mc
->vertex_elems
[3].src_offset
= 0;
2161 mc
->vertex_elems
[3].vertex_buffer_index
= 2;
2162 mc
->vertex_elems
[3].nr_components
= 2;
2163 mc
->vertex_elems
[3].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2165 /* Second ref surface bottom field texcoord element */
2166 mc
->vertex_elems
[4].src_offset
= sizeof(struct vlVertex2f
);
2167 mc
->vertex_elems
[4].vertex_buffer_index
= 2;
2168 mc
->vertex_elems
[4].nr_components
= 2;
2169 mc
->vertex_elems
[4].src_format
= PIPE_FORMAT_R32G32_FLOAT
;
2171 /* Create our constant buffer */
2172 mc
->vs_const_buf
.size
= sizeof(struct vlVertexShaderConsts
);
2173 mc
->vs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2177 PIPE_BUFFER_USAGE_CONSTANT
,
2178 mc
->vs_const_buf
.size
2181 mc
->fs_const_buf
.size
= sizeof(struct vlFragmentShaderConsts
);
2182 mc
->fs_const_buf
.buffer
= pipe
->winsys
->buffer_create
2186 PIPE_BUFFER_USAGE_CONSTANT
,
2187 mc
->fs_const_buf
.size
2192 pipe
->winsys
->buffer_map(pipe
->winsys
, mc
->fs_const_buf
.buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
),
2194 sizeof(struct vlFragmentShaderConsts
)
2197 pipe
->winsys
->buffer_unmap(pipe
->winsys
, mc
->fs_const_buf
.buffer
);
2204 struct vlR16SnormBufferedMC
*mc
2207 struct pipe_context
*pipe
;
2208 struct pipe_sampler_state sampler
;
2209 struct pipe_texture
template;
2210 unsigned int filters
[5];
2217 /* For MC we render to textures, which are rounded up to nearest POT */
2218 mc
->viewport
.scale
[0] = vlRoundUpPOT(mc
->video_width
);
2219 mc
->viewport
.scale
[1] = vlRoundUpPOT(mc
->video_height
);
2220 mc
->viewport
.scale
[2] = 1;
2221 mc
->viewport
.scale
[3] = 1;
2222 mc
->viewport
.translate
[0] = 0;
2223 mc
->viewport
.translate
[1] = 0;
2224 mc
->viewport
.translate
[2] = 0;
2225 mc
->viewport
.translate
[3] = 0;
2227 mc
->render_target
.width
= vlRoundUpPOT(mc
->video_width
);
2228 mc
->render_target
.height
= vlRoundUpPOT(mc
->video_height
);
2229 mc
->render_target
.num_cbufs
= 1;
2230 /* FB for MC stage is a vlSurface created by the user, set at render time */
2231 mc
->render_target
.zsbuf
= NULL
;
2233 filters
[0] = PIPE_TEX_FILTER_NEAREST
;
2234 filters
[1] = mc
->video_format
== vlFormatYCbCr444
? PIPE_TEX_FILTER_NEAREST
: PIPE_TEX_FILTER_LINEAR
;
2235 filters
[2] = mc
->video_format
== vlFormatYCbCr444
? PIPE_TEX_FILTER_NEAREST
: PIPE_TEX_FILTER_LINEAR
;
2236 filters
[3] = PIPE_TEX_FILTER_LINEAR
;
2237 filters
[4] = PIPE_TEX_FILTER_LINEAR
;
2239 for (i
= 0; i
< 5; ++i
)
2241 sampler
.wrap_s
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2242 sampler
.wrap_t
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2243 sampler
.wrap_r
= PIPE_TEX_WRAP_CLAMP_TO_EDGE
;
2244 sampler
.min_img_filter
= filters
[i
];
2245 sampler
.min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
2246 sampler
.mag_img_filter
= filters
[i
];
2247 sampler
.compare_mode
= PIPE_TEX_COMPARE_NONE
;
2248 sampler
.compare_func
= PIPE_FUNC_ALWAYS
;
2249 sampler
.normalized_coords
= 1;
2250 /*sampler.prefilter = ;*/
2251 /*sampler.shadow_ambient = ;*/
2252 /*sampler.lod_bias = ;*/
2253 sampler
.min_lod
= 0;
2254 /*sampler.max_lod = ;*/
2255 /*sampler.border_color[i] = ;*/
2256 /*sampler.max_anisotropy = ;*/
2257 mc
->samplers
[i
] = pipe
->create_sampler_state(pipe
, &sampler
);
2260 memset(&template, 0, sizeof(struct pipe_texture
));
2261 template.target
= PIPE_TEXTURE_2D
;
2262 template.format
= PIPE_FORMAT_R16_SNORM
;
2263 template.last_level
= 0;
2264 template.width
[0] = vlRoundUpPOT(mc
->video_width
);
2265 template.height
[0] = vlRoundUpPOT(mc
->video_height
);
2266 template.depth
[0] = 1;
2267 template.compressed
= 0;
2268 pf_get_block(template.format
, &template.block
);
2270 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2271 mc
->textures
[i
][0] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2273 if (mc
->video_format
== vlFormatYCbCr420
)
2275 template.width
[0] = vlRoundUpPOT(mc
->video_width
/ 2);
2276 template.height
[0] = vlRoundUpPOT(mc
->video_height
/ 2);
2278 else if (mc
->video_format
== vlFormatYCbCr422
)
2279 template.height
[0] = vlRoundUpPOT(mc
->video_height
/ 2);
2281 for (i
= 0; i
< NUM_BUF_SETS
; ++i
)
2283 mc
->textures
[i
][1] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2284 mc
->textures
[i
][2] = pipe
->screen
->texture_create(pipe
->screen
, &template);
2287 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2289 vlCreateVertexShaderIMB(mc
);
2290 vlCreateFragmentShaderIMB(mc
);
2291 vlCreateVertexShaderFramePMB(mc
);
2292 vlCreateVertexShaderFieldPMB(mc
);
2293 vlCreateFragmentShaderFramePMB(mc
);
2294 vlCreateFragmentShaderFieldPMB(mc
);
2295 vlCreateVertexShaderFrameBMB(mc
);
2296 vlCreateVertexShaderFieldBMB(mc
);
2297 vlCreateFragmentShaderFrameBMB(mc
);
2298 vlCreateFragmentShaderFieldBMB(mc
);
2299 vlCreateDataBufs(mc
);
2304 int vlCreateR16SNormBufferedMC
2306 struct pipe_context
*pipe
,
2307 unsigned int video_width
,
2308 unsigned int video_height
,
2309 enum vlFormat video_format
,
2310 struct vlRender
**render
2313 struct vlR16SnormBufferedMC
*mc
;
2318 mc
= calloc(1, sizeof(struct vlR16SnormBufferedMC
));
2320 mc
->base
.vlBegin
= &vlBegin
;
2321 mc
->base
.vlRenderMacroBlocksMpeg2
= &vlRenderMacroBlocksMpeg2R16SnormBuffered
;
2322 mc
->base
.vlEnd
= &vlEnd
;
2323 mc
->base
.vlFlush
= &vlFlush
;
2324 mc
->base
.vlDestroy
= &vlDestroy
;
2326 mc
->video_width
= video_width
;
2327 mc
->video_height
= video_height
;
2330 mc
->buffered_surface
= NULL
;
2331 mc
->past_surface
= NULL
;
2332 mc
->future_surface
= NULL
;
2333 memset(mc
->num_macroblocks
, 0, sizeof(unsigned int) * 7);
2334 mc
->total_num_macroblocks
= 0;
2338 *render
= &mc
->base
;