g3dvl: Some clean ups.
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 /*
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
27 * than we destroy.
28 */
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
30
31 enum vlMacroBlockTypeEx
32 {
33 vlMacroBlockExTypeIntra,
34 vlMacroBlockExTypeFwdPredictedFrame,
35 vlMacroBlockExTypeFwdPredictedField,
36 vlMacroBlockExTypeBkwdPredictedFrame,
37 vlMacroBlockExTypeBkwdPredictedField,
38 vlMacroBlockExTypeBiPredictedFrame,
39 vlMacroBlockExTypeBiPredictedField,
40
41 vlNumMacroBlockExTypes
42 };
43
44 struct vlVertexShaderConsts
45 {
46 struct vlVertex4f denorm;
47 };
48
49 struct vlFragmentShaderConsts
50 {
51 struct vlVertex4f multiplier;
52 struct vlVertex4f div;
53 };
54
55 struct vlR16SnormBufferedMC
56 {
57 struct vlRender base;
58
59 unsigned int video_width, video_height;
60 enum vlFormat video_format;
61
62 unsigned int cur_buf;
63 struct vlSurface *buffered_surface;
64 struct vlSurface *past_surface, *future_surface;
65 struct vlVertex2f surface_tex_inv_size;
66 unsigned int num_macroblocks[vlNumMacroBlockExTypes];
67
68 struct pipe_context *pipe;
69 struct pipe_viewport_state viewport;
70 struct pipe_framebuffer_state render_target;
71 struct pipe_sampler_state *samplers[5];
72 struct pipe_texture *textures[NUM_BUF_SETS][5];
73 void *i_vs, *p_vs[2], *b_vs[2];
74 void *i_fs, *p_fs[2], *b_fs[2];
75 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3];
76 struct pipe_vertex_element vertex_elems[5];
77 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
78 };
79
80 static int vlBegin
81 (
82 struct vlRender *render
83 )
84 {
85 assert(render);
86
87 return 0;
88 }
89
90 static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
91 {
92 unsigned int y;
93
94 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
95 memcpy
96 (
97 dst + y * dst_pitch,
98 src + y * VL_BLOCK_WIDTH,
99 VL_BLOCK_WIDTH * 2
100 );
101
102 return 0;
103 }
104
105 static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
106 {
107 unsigned int y;
108
109 for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
110 memcpy
111 (
112 dst + y * dst_pitch * 2,
113 src + y * VL_BLOCK_WIDTH,
114 VL_BLOCK_WIDTH * 2
115 );
116
117 dst += VL_BLOCK_HEIGHT * dst_pitch;
118
119 for (; y < VL_BLOCK_HEIGHT; ++y)
120 memcpy
121 (
122 dst + y * dst_pitch * 2,
123 src + y * VL_BLOCK_WIDTH,
124 VL_BLOCK_WIDTH * 2
125 );
126
127 return 0;
128 }
129
130 static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
131 {
132 unsigned int y;
133
134 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
135 memset
136 (
137 dst + y * dst_pitch,
138 0,
139 VL_BLOCK_WIDTH * 2
140 );
141
142 return 0;
143 }
144
145 static inline int vlGrabBlocks
146 (
147 struct vlR16SnormBufferedMC *mc,
148 unsigned int mbx,
149 unsigned int mby,
150 enum vlDCTType dct_type,
151 unsigned int coded_block_pattern,
152 short *blocks
153 )
154 {
155 struct pipe_surface *tex_surface;
156 short *texels;
157 unsigned int tex_pitch;
158 unsigned int x, y, tb = 0, sb = 0;
159 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
160
161 assert(mc);
162 assert(blocks);
163
164 tex_surface = mc->pipe->screen->get_tex_surface
165 (
166 mc->pipe->screen,
167 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
168 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
169 );
170
171 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
172 tex_pitch = tex_surface->stride / tex_surface->block.size;
173
174 texels += mbpy * tex_pitch + mbpx;
175
176 for (y = 0; y < 2; ++y)
177 {
178 for (x = 0; x < 2; ++x, ++tb)
179 {
180 if ((coded_block_pattern >> (5 - tb)) & 1)
181 {
182 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
183
184 if (dct_type == vlDCTTypeFrameCoded)
185 {
186 vlGrabFrameCodedBlock
187 (
188 cur_block,
189 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
190 tex_pitch
191 );
192 }
193 else
194 {
195 vlGrabFieldCodedBlock
196 (
197 cur_block,
198 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
199 tex_pitch
200 );
201 }
202
203 ++sb;
204 }
205 else
206 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
207 }
208 }
209
210 pipe_surface_unmap(tex_surface);
211
212 /* TODO: Implement 422, 444 */
213 mbpx >>= 1;
214 mbpy >>= 1;
215
216 for (tb = 0; tb < 2; ++tb)
217 {
218 tex_surface = mc->pipe->screen->get_tex_surface
219 (
220 mc->pipe->screen,
221 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
222 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
223 );
224
225 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
226 tex_pitch = tex_surface->stride / tex_surface->block.size;
227
228 texels += mbpy * tex_pitch + mbpx;
229
230 if ((coded_block_pattern >> (1 - tb)) & 1)
231 {
232 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
233
234 vlGrabFrameCodedBlock
235 (
236 cur_block,
237 texels,
238 tex_pitch
239 );
240
241 ++sb;
242 }
243 else
244 vlGrabNoBlock(texels, tex_pitch);
245
246 pipe_surface_unmap(tex_surface);
247 }
248
249 return 0;
250 }
251
252 static inline int vlGrabMacroBlock
253 (
254 struct vlR16SnormBufferedMC *mc,
255 struct vlMpeg2MacroBlock *macroblock
256 )
257 {
258 const struct vlVertex2f unit =
259 {
260 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
261 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
262 };
263 const struct vlVertex2f half =
264 {
265 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
266 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
267 };
268
269 struct vlVertex2f *vb;
270 enum vlMacroBlockTypeEx mb_type_ex;
271 struct vlVertex2f mo_vec[2];
272 unsigned int i;
273
274 assert(mc);
275 assert(macroblock);
276
277 switch (macroblock->mb_type)
278 {
279 case vlMacroBlockTypeIntra:
280 {
281 mb_type_ex = vlMacroBlockExTypeIntra;
282 break;
283 }
284 case vlMacroBlockTypeFwdPredicted:
285 {
286 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
287 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
288 break;
289 }
290 case vlMacroBlockTypeBkwdPredicted:
291 {
292 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
293 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
294 break;
295 }
296 case vlMacroBlockTypeBiPredicted:
297 {
298 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
299 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
300 break;
301 }
302 default:
303 assert(0);
304 }
305
306 switch (macroblock->mb_type)
307 {
308 case vlMacroBlockTypeBiPredicted:
309 {
310 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
311 (
312 mc->pipe->winsys,
313 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer,
314 PIPE_BUFFER_USAGE_CPU_WRITE
315 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
316
317 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
318 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
319
320 if (macroblock->mo_type == vlMotionTypeFrame)
321 {
322 for (i = 0; i < 24 * 2; i += 2)
323 {
324 vb[i].x = mo_vec[0].x;
325 vb[i].y = mo_vec[0].y;
326 }
327 }
328 else
329 {
330 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
331 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
332
333 for (i = 0; i < 24 * 2; i += 2)
334 {
335 vb[i].x = mo_vec[0].x;
336 vb[i].y = mo_vec[0].y;
337 vb[i + 1].x = mo_vec[1].x;
338 vb[i + 1].y = mo_vec[1].y;
339 }
340 }
341
342 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer);
343
344 /* fall-through */
345 }
346 case vlMacroBlockTypeFwdPredicted:
347 case vlMacroBlockTypeBkwdPredicted:
348 {
349 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
350 (
351 mc->pipe->winsys,
352 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer,
353 PIPE_BUFFER_USAGE_CPU_WRITE
354 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
355
356 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
357 {
358 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
359 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
360
361 if (macroblock->mo_type == vlMotionTypeField)
362 {
363 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
364 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
365 }
366 }
367 else
368 {
369 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
370 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
371
372 if (macroblock->mo_type == vlMotionTypeField)
373 {
374 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
375 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
376 }
377 }
378
379 if (macroblock->mo_type == vlMotionTypeFrame)
380 {
381 for (i = 0; i < 24 * 2; i += 2)
382 {
383 vb[i].x = mo_vec[0].x;
384 vb[i].y = mo_vec[0].y;
385 }
386 }
387 else
388 {
389 for (i = 0; i < 24 * 2; i += 2)
390 {
391 vb[i].x = mo_vec[0].x;
392 vb[i].y = mo_vec[0].y;
393 vb[i + 1].x = mo_vec[1].x;
394 vb[i + 1].y = mo_vec[1].y;
395 }
396 }
397
398 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer);
399
400 /* fall-through */
401 }
402 case vlMacroBlockTypeIntra:
403 {
404 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
405 (
406 mc->pipe->winsys,
407 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer,
408 PIPE_BUFFER_USAGE_CPU_WRITE
409 ) + mc->num_macroblocks[mb_type_ex] * 24;
410
411 vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y;
412 vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y;
413 vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y;
414
415 vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y;
416 vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y;
417 vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y;
418
419 vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y;
420 vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y;
421 vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y;
422
423 vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y;
424 vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y;
425 vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y;
426
427 vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y;
428 vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y;
429 vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y;
430
431 vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y;
432 vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y;
433 vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y;
434
435 vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y;
436 vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y;
437 vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y;
438
439 vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y;
440 vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y;
441 vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y;
442
443 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer);
444
445 break;
446 }
447 default:
448 assert(0);
449 }
450
451 vlGrabBlocks
452 (
453 mc,
454 macroblock->mbx,
455 macroblock->mby,
456 macroblock->dct_type,
457 macroblock->cbp,
458 macroblock->blocks
459 );
460
461 mc->num_macroblocks[mb_type_ex]++;
462
463 return 0;
464 }
465
466 static int vlFlush
467 (
468 struct vlRender *render
469 )
470 {
471 struct vlR16SnormBufferedMC *mc;
472 struct pipe_context *pipe;
473 struct vlVertexShaderConsts *vs_consts;
474
475 assert(mc);
476
477 mc = (struct vlR16SnormBufferedMC*)render;
478 pipe = mc->pipe;
479
480 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
481 (
482 pipe->screen,
483 mc->buffered_surface->texture,
484 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
485 );
486
487 pipe->set_framebuffer_state(pipe, &mc->render_target);
488 pipe->set_viewport_state(pipe, &mc->viewport);
489 vs_consts = pipe->winsys->buffer_map
490 (
491 pipe->winsys,
492 mc->vs_const_buf.buffer,
493 PIPE_BUFFER_USAGE_CPU_WRITE
494 );
495
496 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
497 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
498
499 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
500 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
501
502 if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0)
503 {
504 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]);
505 pipe->set_vertex_elements(pipe, 1, mc->vertex_elems);
506 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
507 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
508 pipe->bind_vs_state(pipe, mc->i_vs);
509 pipe->bind_fs_state(pipe, mc->i_fs);
510
511 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24);
512 }
513
514 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
515 {
516 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]);
517 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
518 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
519 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
520 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
521 pipe->bind_vs_state(pipe, mc->p_vs[0]);
522 pipe->bind_fs_state(pipe, mc->p_fs[0]);
523
524 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
525 }
526
527 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
528 {
529 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]);
530 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
531 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
532 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
533 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
534 pipe->bind_vs_state(pipe, mc->p_vs[1]);
535 pipe->bind_fs_state(pipe, mc->p_fs[1]);
536
537 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
538 }
539
540 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
541 {
542 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]);
543 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
544 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
545 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
546 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
547 pipe->bind_vs_state(pipe, mc->p_vs[0]);
548 pipe->bind_fs_state(pipe, mc->p_fs[0]);
549
550 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
551 }
552
553 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
554 {
555 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]);
556 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
557 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
558 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
559 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
560 pipe->bind_vs_state(pipe, mc->p_vs[1]);
561 pipe->bind_fs_state(pipe, mc->p_fs[1]);
562
563 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
564 }
565
566 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
567 {
568 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]);
569 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
570 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
571 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
572 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
573 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
574 pipe->bind_vs_state(pipe, mc->b_vs[0]);
575 pipe->bind_fs_state(pipe, mc->b_fs[0]);
576
577 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
578 }
579
580 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
581 {
582 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]);
583 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
584 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
585 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
586 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
587 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
588 pipe->bind_vs_state(pipe, mc->b_vs[1]);
589 pipe->bind_fs_state(pipe, mc->b_fs[1]);
590
591 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
592 }
593
594 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes);
595 mc->cur_buf++;
596
597 return 0;
598 }
599
600 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
601 (
602 struct vlRender *render,
603 struct vlMpeg2MacroBlockBatch *batch,
604 struct vlSurface *surface
605 )
606 {
607 struct vlR16SnormBufferedMC *mc;
608 unsigned int i;
609
610 assert(render);
611
612 mc = (struct vlR16SnormBufferedMC*)render;
613
614 if (mc->buffered_surface)
615 {
616 if
617 (
618 mc->buffered_surface != surface /*||
619 mc->past_surface != batch->past_surface ||
620 mc->future_surface != batch->future_surface*/
621 )
622 {
623 vlFlush(&mc->base);
624 mc->buffered_surface = surface;
625 mc->past_surface = batch->past_surface;
626 mc->future_surface = batch->future_surface;
627 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
628 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
629 }
630 }
631 else
632 {
633 mc->buffered_surface = surface;
634 mc->past_surface = batch->past_surface;
635 mc->future_surface = batch->future_surface;
636 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
637 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
638 }
639
640 for (i = 0; i < batch->num_macroblocks; ++i)
641 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
642
643 return 0;
644 }
645
646 static int vlEnd
647 (
648 struct vlRender *render
649 )
650 {
651 assert(render);
652
653 return 0;
654 }
655
656 static int vlDestroy
657 (
658 struct vlRender *render
659 )
660 {
661 struct vlR16SnormBufferedMC *mc;
662 struct pipe_context *pipe;
663 unsigned int g, h, i;
664
665 assert(render);
666
667 mc = (struct vlR16SnormBufferedMC*)render;
668 pipe = mc->pipe;
669
670 for (i = 0; i < 5; ++i)
671 pipe->delete_sampler_state(pipe, mc->samplers[i]);
672
673 for (g = 0; g < NUM_BUF_SETS; ++g)
674 for (h = 0; h < vlNumMacroBlockExTypes; ++h)
675 for (i = 0; i < 3; ++i)
676 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer);
677
678 /* Textures 3 & 4 are not created directly, no need to release them here */
679 for (i = 0; i < NUM_BUF_SETS; ++i)
680 {
681 pipe_texture_release(&mc->textures[i][0]);
682 pipe_texture_release(&mc->textures[i][1]);
683 pipe_texture_release(&mc->textures[i][2]);
684 }
685
686 pipe->delete_vs_state(pipe, mc->i_vs);
687 pipe->delete_fs_state(pipe, mc->i_fs);
688
689 for (i = 0; i < 2; ++i)
690 {
691 pipe->delete_vs_state(pipe, mc->p_vs[i]);
692 pipe->delete_fs_state(pipe, mc->p_fs[i]);
693 pipe->delete_vs_state(pipe, mc->b_vs[i]);
694 pipe->delete_fs_state(pipe, mc->b_fs[i]);
695 }
696
697 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
698 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
699
700 free(mc);
701
702 return 0;
703 }
704
705 /*
706 * Muliplier renormalizes block samples from 16 bits to 12 bits.
707 * Divider is used when calculating Y % 2 for choosing top or bottom
708 * field for P or B macroblocks.
709 * TODO: Use immediates.
710 */
711 static const struct vlFragmentShaderConsts fs_consts =
712 {
713 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
714 {0.5f, 2.0f, 0.0f, 0.0f}
715 };
716
717 static int vlCreateVertexShaderIMB
718 (
719 struct vlR16SnormBufferedMC *mc
720 )
721 {
722 const unsigned int max_tokens = 50;
723
724 struct pipe_context *pipe;
725 struct pipe_shader_state vs;
726 struct tgsi_token *tokens;
727 struct tgsi_header *header;
728
729 struct tgsi_full_declaration decl;
730 struct tgsi_full_instruction inst;
731
732 unsigned int ti;
733 unsigned int i;
734
735 assert(mc);
736
737 pipe = mc->pipe;
738 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
739
740 /* Version */
741 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
742 /* Header */
743 header = (struct tgsi_header*)&tokens[1];
744 *header = tgsi_build_header();
745 /* Processor */
746 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
747
748 ti = 3;
749
750 /*
751 * decl i0 ; Vertex pos, luma & chroma texcoords
752 */
753 for (i = 0; i < 3; i++)
754 {
755 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
756 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
757 }
758
759 /*
760 * decl o0 ; Vertex pos
761 * decl o1 ; Luma/chroma texcoords
762 */
763 for (i = 0; i < 2; i++)
764 {
765 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
766 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
767 }
768
769 /*
770 * mov o0, i0 ; Move input vertex pos to output
771 * mov o1, i0 ; Move input luma/chroma texcoords to output
772 */
773 for (i = 0; i < 2; ++i)
774 {
775 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
776 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
777 }
778
779 /* end */
780 inst = vl_end();
781 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
782
783 vs.tokens = tokens;
784 mc->i_vs = pipe->create_vs_state(pipe, &vs);
785 free(tokens);
786
787 return 0;
788 }
789
790 static int vlCreateFragmentShaderIMB
791 (
792 struct vlR16SnormBufferedMC *mc
793 )
794 {
795 const unsigned int max_tokens = 100;
796
797 struct pipe_context *pipe;
798 struct pipe_shader_state fs;
799 struct tgsi_token *tokens;
800 struct tgsi_header *header;
801
802 struct tgsi_full_declaration decl;
803 struct tgsi_full_instruction inst;
804
805 unsigned int ti;
806 unsigned int i;
807
808 assert(mc);
809
810 pipe = mc->pipe;
811 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
812
813 /* Version */
814 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
815 /* Header */
816 header = (struct tgsi_header*)&tokens[1];
817 *header = tgsi_build_header();
818 /* Processor */
819 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
820
821 ti = 3;
822
823 /* decl i0 ; Luma/chroma texcoords */
824 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
825 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
826
827 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
828 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
829 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
830
831 /* decl o0 ; Fragment color */
832 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
833 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
834
835 /* decl t0, t1 */
836 decl = vl_decl_temps(0, 1);
837 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
838
839 /*
840 * decl s0 ; Sampler for luma texture
841 * decl s1 ; Sampler for chroma Cb texture
842 * decl s2 ; Sampler for chroma Cr texture
843 */
844 for (i = 0; i < 3; ++i)
845 {
846 decl = vl_decl_samplers(i, i);
847 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
848 }
849
850 /*
851 * tex2d t1, i0, s0 ; Read texel from luma texture
852 * mov t0.x, t1.x ; Move luma sample into .x component
853 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
854 * mov t0.y, t1.x ; Move Cb sample into .y component
855 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
856 * mov t0.z, t1.x ; Move Cr sample into .z component
857 */
858 for (i = 0; i < 3; ++i)
859 {
860 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
861 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
862
863 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
864 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
865 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
866 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
867 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
868 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
869
870 }
871
872 /* mul o0, t0, c0 ; Rescale texel to correct range */
873 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
874 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
875
876 /* end */
877 inst = vl_end();
878 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
879
880 fs.tokens = tokens;
881 mc->i_fs = pipe->create_fs_state(pipe, &fs);
882 free(tokens);
883
884 return 0;
885 }
886
887 static int vlCreateVertexShaderFramePMB
888 (
889 struct vlR16SnormBufferedMC *mc
890 )
891 {
892 const unsigned int max_tokens = 100;
893
894 struct pipe_context *pipe;
895 struct pipe_shader_state vs;
896 struct tgsi_token *tokens;
897 struct tgsi_header *header;
898
899 struct tgsi_full_declaration decl;
900 struct tgsi_full_instruction inst;
901
902 unsigned int ti;
903 unsigned int i;
904
905 assert(mc);
906
907 pipe = mc->pipe;
908 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
909
910 /* Version */
911 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
912 /* Header */
913 header = (struct tgsi_header*)&tokens[1];
914 *header = tgsi_build_header();
915 /* Processor */
916 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
917
918 ti = 3;
919
920 /*
921 * decl i0 ; Vertex pos, luma/chroma texcoords
922 * decl i1 ; Ref surface top field texcoords
923 * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream)
924 */
925 for (i = 0; i < 3; i++)
926 {
927 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
928 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
929 }
930
931 /*
932 * decl o0 ; Vertex pos
933 * decl o1 ; Luma/chroma texcoords
934 * decl o2 ; Ref macroblock texcoords
935 */
936 for (i = 0; i < 3; i++)
937 {
938 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
939 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
940 }
941
942 /*
943 * mov o0, i0 ; Move input vertex pos to output
944 * mov o1, i0 ; Move input luma/chroma texcoords to output
945 */
946 for (i = 0; i < 2; ++i)
947 {
948 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
949 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
950 }
951
952 /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
953 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1);
954 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
955
956 /* end */
957 inst = vl_end();
958 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
959
960 vs.tokens = tokens;
961 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
962 free(tokens);
963
964 return 0;
965 }
966
967 static int vlCreateVertexShaderFieldPMB
968 (
969 struct vlR16SnormBufferedMC *mc
970 )
971 {
972 const unsigned int max_tokens = 100;
973
974 struct pipe_context *pipe;
975 struct pipe_shader_state vs;
976 struct tgsi_token *tokens;
977 struct tgsi_header *header;
978
979 struct tgsi_full_declaration decl;
980 struct tgsi_full_instruction inst;
981
982 unsigned int ti;
983 unsigned int i;
984
985 assert(mc);
986
987 pipe = mc->pipe;
988 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
989
990 /* Version */
991 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
992 /* Header */
993 header = (struct tgsi_header*)&tokens[1];
994 *header = tgsi_build_header();
995 /* Processor */
996 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
997
998 ti = 3;
999
1000 /*
1001 * decl i0 ; Vertex pos, luma/chroma texcoords
1002 * decl i1 ; Ref surface top field texcoords
1003 * decl i2 ; Ref surface bottom field texcoords
1004 */
1005 for (i = 0; i < 3; i++)
1006 {
1007 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1008 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1009 }
1010
1011 /* decl c0 ; Texcoord denorm coefficients */
1012 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1013 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1014
1015 /*
1016 * decl o0 ; Vertex pos
1017 * decl o1 ; Luma/chroma texcoords
1018 * decl o2 ; Top field ref macroblock texcoords
1019 * decl o3 ; Bottom field ref macroblock texcoords
1020 * decl o4 ; Denormalized vertex pos
1021 */
1022 for (i = 0; i < 5; i++)
1023 {
1024 decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1025 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1026 }
1027
1028 /*
1029 * mov o0, i0 ; Move input vertex pos to output
1030 * mov o1, i0 ; Move input luma/chroma texcoords to output
1031 */
1032 for (i = 0; i < 3; ++i)
1033 {
1034 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1);
1035 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1036 }
1037
1038 /*
1039 * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1040 * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1041 */
1042 for (i = 0; i < 2; ++i)
1043 {
1044 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1045 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1046 }
1047
1048 /* mul o4, i0, c0 ; Denorm vertex pos */
1049 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1050 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1051
1052 /* end */
1053 inst = vl_end();
1054 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1055
1056 vs.tokens = tokens;
1057 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1058 free(tokens);
1059
1060 return 0;
1061 }
1062
1063 static int vlCreateFragmentShaderFramePMB
1064 (
1065 struct vlR16SnormBufferedMC *mc
1066 )
1067 {
1068 const unsigned int max_tokens = 100;
1069
1070 struct pipe_context *pipe;
1071 struct pipe_shader_state fs;
1072 struct tgsi_token *tokens;
1073 struct tgsi_header *header;
1074
1075 struct tgsi_full_declaration decl;
1076 struct tgsi_full_instruction inst;
1077
1078 unsigned int ti;
1079 unsigned int i;
1080
1081 assert(mc);
1082
1083 pipe = mc->pipe;
1084 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1085
1086 /* Version */
1087 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1088 /* Header */
1089 header = (struct tgsi_header*)&tokens[1];
1090 *header = tgsi_build_header();
1091 /* Processor */
1092 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1093
1094 ti = 3;
1095
1096 /*
1097 * decl i0 ; Texcoords for s0, s1, s2
1098 * decl i1 ; Texcoords for s3
1099 */
1100 for (i = 0; i < 2; ++i)
1101 {
1102 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1103 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1104 }
1105
1106 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1107 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1108 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1109
1110 /* decl o0 ; Fragment color */
1111 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1112 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1113
1114 /* decl t0, t1 */
1115 decl = vl_decl_temps(0, 1);
1116 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1117
1118 /*
1119 * decl s0 ; Sampler for luma texture
1120 * decl s1 ; Sampler for chroma Cb texture
1121 * decl s2 ; Sampler for chroma Cr texture
1122 * decl s3 ; Sampler for ref surface texture
1123 */
1124 for (i = 0; i < 4; ++i)
1125 {
1126 decl = vl_decl_samplers(i, i);
1127 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1128 }
1129
1130 /*
1131 * tex2d t1, i0, s0 ; Read texel from luma texture
1132 * mov t0.x, t1.x ; Move luma sample into .x component
1133 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1134 * mov t0.y, t1.x ; Move Cb sample into .y component
1135 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1136 * mov t0.z, t1.x ; Move Cr sample into .z component
1137 */
1138 for (i = 0; i < 3; ++i)
1139 {
1140 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1141 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1142
1143 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1144 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1145 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1146 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1147 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1148 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1149
1150 }
1151
1152 /* mul t0, t0, c0 ; Rescale texel to correct range */
1153 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1154 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1155
1156 /* tex2d t1, i1, s3 ; Read texel from ref macroblock */
1157 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3);
1158 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1159
1160 /* add o0, t0, t1 ; Add ref and differential to form final output */
1161 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1162 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1163
1164 /* end */
1165 inst = vl_end();
1166 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1167
1168 fs.tokens = tokens;
1169 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1170 free(tokens);
1171
1172 return 0;
1173 }
1174
1175 static int vlCreateFragmentShaderFieldPMB
1176 (
1177 struct vlR16SnormBufferedMC *mc
1178 )
1179 {
1180 const unsigned int max_tokens = 200;
1181
1182 struct pipe_context *pipe;
1183 struct pipe_shader_state fs;
1184 struct tgsi_token *tokens;
1185 struct tgsi_header *header;
1186
1187 struct tgsi_full_declaration decl;
1188 struct tgsi_full_instruction inst;
1189
1190 unsigned int ti;
1191 unsigned int i;
1192
1193 assert(mc);
1194
1195 pipe = mc->pipe;
1196 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1197
1198 /* Version */
1199 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1200 /* Header */
1201 header = (struct tgsi_header*)&tokens[1];
1202 *header = tgsi_build_header();
1203 /* Processor */
1204 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1205
1206 ti = 3;
1207
1208 /*
1209 * decl i0 ; Texcoords for s0, s1, s2
1210 * decl i1 ; Texcoords for s3
1211 * decl i2 ; Texcoords for s3
1212 * decl i3 ; Denormalized vertex pos
1213 */
1214 for (i = 0; i < 4; ++i)
1215 {
1216 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1217 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1218 }
1219
1220 /*
1221 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1222 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1223 */
1224 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1225 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1226
1227 /* decl o0 ; Fragment color */
1228 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1229 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1230
1231 /* decl t0-t4 */
1232 decl = vl_decl_temps(0, 4);
1233 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1234
1235 /*
1236 * decl s0 ; Sampler for luma texture
1237 * decl s1 ; Sampler for chroma Cb texture
1238 * decl s2 ; Sampler for chroma Cr texture
1239 * decl s3 ; Sampler for ref surface texture
1240 */
1241 for (i = 0; i < 4; ++i)
1242 {
1243 decl = vl_decl_samplers(i, i);
1244 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1245 }
1246
1247 /*
1248 * tex2d t1, i0, s0 ; Read texel from luma texture
1249 * mov t0.x, t1.x ; Move luma sample into .x component
1250 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1251 * mov t0.y, t1.x ; Move Cb sample into .y component
1252 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1253 * mov t0.z, t1.x ; Move Cr sample into .z component
1254 */
1255 for (i = 0; i < 3; ++i)
1256 {
1257 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1258 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1259
1260 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1261 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1262 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1263 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1264 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1265 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1266
1267 }
1268
1269 /* mul t0, t0, c0 ; Rescale texel to correct range */
1270 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1271 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1272
1273 /*
1274 * tex2d t1, i1, s3 ; Read texel from ref macroblock top field
1275 * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field
1276 */
1277 for (i = 0; i < 2; ++i)
1278 {
1279 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
1280 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1281 }
1282
1283 /* XXX: Pos values off by 0.5? */
1284 /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */
1285 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1);
1286 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1287 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1288 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1289 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1290 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1291 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1292 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1293 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1294 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1295
1296 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1297 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1298 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1299 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1300 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1301 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1302 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1303
1304 /* floor t3, t3 ; Get rid of fractional part */
1305 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1306 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1307
1308 /* mul t3, t3, c1.y ; Multiply by 2 */
1309 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1310 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1311 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1312 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1313 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1314 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1315
1316 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1317 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1318 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1319
1320 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1321 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1322 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1323 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1324
1325 /* add o0, t0, t1 ; Add ref and differential to form final output */
1326 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1327 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1328
1329 /* end */
1330 inst = vl_end();
1331 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1332
1333 fs.tokens = tokens;
1334 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1335 free(tokens);
1336
1337 return 0;
1338 }
1339
1340 static int vlCreateVertexShaderFrameBMB
1341 (
1342 struct vlR16SnormBufferedMC *mc
1343 )
1344 {
1345 const unsigned int max_tokens = 100;
1346
1347 struct pipe_context *pipe;
1348 struct pipe_shader_state vs;
1349 struct tgsi_token *tokens;
1350 struct tgsi_header *header;
1351
1352 struct tgsi_full_declaration decl;
1353 struct tgsi_full_instruction inst;
1354
1355 unsigned int ti;
1356 unsigned int i;
1357
1358 assert(mc);
1359
1360 pipe = mc->pipe;
1361 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1362
1363 /* Version */
1364 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1365 /* Header */
1366 header = (struct tgsi_header*)&tokens[1];
1367 *header = tgsi_build_header();
1368 /* Processor */
1369 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1370
1371 ti = 3;
1372
1373 /*
1374 * decl i0 ; Vertex pos, luma/chroma texcoords
1375 * decl i1 ; First ref surface top field texcoords
1376 * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream)
1377 * decl i3 ; Second ref surface top field texcoords
1378 * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream)
1379 */
1380 for (i = 0; i < 5; i++)
1381 {
1382 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1383 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1384 }
1385
1386 /*
1387 * decl o0 ; Vertex pos
1388 * decl o1 ; Luma/chroma texcoords
1389 * decl o2 ; First ref macroblock texcoords
1390 * decl o3 ; Second ref macroblock texcoords
1391 */
1392 for (i = 0; i < 4; i++)
1393 {
1394 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1395 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1396 }
1397
1398 /*
1399 * mov o0, i0 ; Move input vertex pos to output
1400 * mov o1, i0 ; Move input luma/chroma texcoords to output
1401 */
1402 for (i = 0; i < 2; ++i)
1403 {
1404 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1405 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1406 }
1407
1408 /*
1409 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1410 * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1411 */
1412 for (i = 0; i < 2; ++i)
1413 {
1414 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1);
1415 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1416 }
1417
1418 /* end */
1419 inst = vl_end();
1420 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1421
1422 vs.tokens = tokens;
1423 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1424 free(tokens);
1425
1426 return 0;
1427 }
1428
1429 static int vlCreateVertexShaderFieldBMB
1430 (
1431 struct vlR16SnormBufferedMC *mc
1432 )
1433 {
1434 const unsigned int max_tokens = 100;
1435
1436 struct pipe_context *pipe;
1437 struct pipe_shader_state vs;
1438 struct tgsi_token *tokens;
1439 struct tgsi_header *header;
1440
1441 struct tgsi_full_declaration decl;
1442 struct tgsi_full_instruction inst;
1443
1444 unsigned int ti;
1445 unsigned int i;
1446
1447 assert(mc);
1448
1449 pipe = mc->pipe;
1450 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1451
1452 /* Version */
1453 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1454 /* Header */
1455 header = (struct tgsi_header*)&tokens[1];
1456 *header = tgsi_build_header();
1457 /* Processor */
1458 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1459
1460 ti = 3;
1461
1462 /*
1463 * decl i0 ; Vertex pos, Luma/chroma texcoords
1464 * decl i1 ; First ref surface top field texcoords
1465 * decl i2 ; First ref surface bottom field texcoords
1466 * decl i3 ; Second ref surface top field texcoords
1467 * decl i4 ; Second ref surface bottom field texcoords
1468 */
1469 for (i = 0; i < 5; i++)
1470 {
1471 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1472 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1473 }
1474
1475 /* decl c0 ; Denorm coefficients */
1476 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1477 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1478
1479 /*
1480 * decl o0 ; Vertex pos
1481 * decl o1 ; Luma/chroma texcoords
1482 * decl o2 ; Top field past ref macroblock texcoords
1483 * decl o3 ; Bottom field past ref macroblock texcoords
1484 * decl o4 ; Top field future ref macroblock texcoords
1485 * decl o5 ; Bottom field future ref macroblock texcoords
1486 * decl o6 ; Denormalized vertex pos
1487 */
1488 for (i = 0; i < 7; i++)
1489 {
1490 decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1491 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1492 }
1493
1494 /* decl t0, t1 */
1495 decl = vl_decl_temps(0, 1);
1496 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1497
1498 /*
1499 * mov o0, i0 ; Move input vertex pos to output
1500 * mov o1, i0 ; Move input luma/chroma texcoords to output
1501 * mov o2, i1 ; Move past top field texcoords to output
1502 * mov o3, i2 ; Move past bottom field texcoords to output
1503 * mov o4, i3 ; Move future top field texcoords to output
1504 * mov o5, i4 ; Move future bottom field texcoords to output
1505 */
1506 for (i = 0; i < 6; ++i)
1507 {
1508 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1509 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1510 }
1511
1512 /*
1513 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1514 * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1515 * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1516 * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1517 */
1518 for (i = 0; i < 4; ++i)
1519 {
1520 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1521 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1522 }
1523
1524 /* mul o6, i0, c0 ; Denorm vertex pos */
1525 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1526 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1527
1528 /* end */
1529 inst = vl_end();
1530 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1531
1532 vs.tokens = tokens;
1533 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1534 free(tokens);
1535
1536 return 0;
1537 }
1538
1539 static int vlCreateFragmentShaderFrameBMB
1540 (
1541 struct vlR16SnormBufferedMC *mc
1542 )
1543 {
1544 const unsigned int max_tokens = 100;
1545
1546 struct pipe_context *pipe;
1547 struct pipe_shader_state fs;
1548 struct tgsi_token *tokens;
1549 struct tgsi_header *header;
1550
1551 struct tgsi_full_declaration decl;
1552 struct tgsi_full_instruction inst;
1553
1554 unsigned int ti;
1555 unsigned int i;
1556
1557 assert(mc);
1558
1559 pipe = mc->pipe;
1560 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1561
1562 /* Version */
1563 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1564 /* Header */
1565 header = (struct tgsi_header*)&tokens[1];
1566 *header = tgsi_build_header();
1567 /* Processor */
1568 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1569
1570 ti = 3;
1571
1572 /*
1573 * decl i0 ; Texcoords for s0, s1, s2
1574 * decl i1 ; Texcoords for s3
1575 * decl i2 ; Texcoords for s4
1576 */
1577 for (i = 0; i < 3; ++i)
1578 {
1579 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1580 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1581 }
1582
1583 /*
1584 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1585 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1586 */
1587 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1588 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1589
1590 /* decl o0 ; Fragment color */
1591 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1592 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1593
1594 /* decl t0-t2 */
1595 decl = vl_decl_temps(0, 2);
1596 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1597
1598 /*
1599 * decl s0 ; Sampler for luma texture
1600 * decl s1 ; Sampler for chroma Cb texture
1601 * decl s2 ; Sampler for chroma Cr texture
1602 * decl s3 ; Sampler for past ref surface texture
1603 * decl s4 ; Sampler for future ref surface texture
1604 */
1605 for (i = 0; i < 5; ++i)
1606 {
1607 decl = vl_decl_samplers(i, i);
1608 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1609 }
1610
1611 /*
1612 * tex2d t1, i0, s0 ; Read texel from luma texture
1613 * mov t0.x, t1.x ; Move luma sample into .x component
1614 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1615 * mov t0.y, t1.x ; Move Cb sample into .y component
1616 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1617 * mov t0.z, t1.x ; Move Cr sample into .z component
1618 */
1619 for (i = 0; i < 3; ++i)
1620 {
1621 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1622 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1623
1624 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1625 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1626 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1627 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1628 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1629 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1630
1631 }
1632
1633 /* mul t0, t0, c0 ; Rescale texel to correct range */
1634 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1635 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1636
1637 /*
1638 * tex2d t1, i1, s3 ; Read texel from past ref macroblock
1639 * tex2d t2, i2, s4 ; Read texel from future ref macroblock
1640 */
1641 for (i = 0; i < 2; ++i)
1642 {
1643 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3);
1644 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1645 }
1646
1647 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1648 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1649 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1650 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1651 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1652 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1653 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1654
1655 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1656 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1657 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1658
1659 /* end */
1660 inst = vl_end();
1661 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1662
1663 fs.tokens = tokens;
1664 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1665 free(tokens);
1666
1667 return 0;
1668 }
1669
1670 static int vlCreateFragmentShaderFieldBMB
1671 (
1672 struct vlR16SnormBufferedMC *mc
1673 )
1674 {
1675 const unsigned int max_tokens = 200;
1676
1677 struct pipe_context *pipe;
1678 struct pipe_shader_state fs;
1679 struct tgsi_token *tokens;
1680 struct tgsi_header *header;
1681
1682 struct tgsi_full_declaration decl;
1683 struct tgsi_full_instruction inst;
1684
1685 unsigned int ti;
1686 unsigned int i;
1687
1688 assert(mc);
1689
1690 pipe = mc->pipe;
1691 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1692
1693 /* Version */
1694 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1695 /* Header */
1696 header = (struct tgsi_header*)&tokens[1];
1697 *header = tgsi_build_header();
1698 /* Processor */
1699 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1700
1701 ti = 3;
1702
1703 /*
1704 * decl i0 ; Texcoords for s0, s1, s2
1705 * decl i1 ; Texcoords for s3
1706 * decl i2 ; Texcoords for s3
1707 * decl i3 ; Texcoords for s4
1708 * decl i4 ; Texcoords for s4
1709 * decl i5 ; Denormalized vertex pos
1710 */
1711 for (i = 0; i < 6; ++i)
1712 {
1713 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1714 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1715 }
1716
1717 /*
1718 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1719 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1720 * ; and for Y-mod-2 top/bottom field selection
1721 */
1722 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1723 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1724
1725 /* decl o0 ; Fragment color */
1726 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1727 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1728
1729 /* decl t0-t5 */
1730 decl = vl_decl_temps(0, 5);
1731 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1732
1733 /*
1734 * decl s0 ; Sampler for luma texture
1735 * decl s1 ; Sampler for chroma Cb texture
1736 * decl s2 ; Sampler for chroma Cr texture
1737 * decl s3 ; Sampler for past ref surface texture
1738 * decl s4 ; Sampler for future ref surface texture
1739 */
1740 for (i = 0; i < 5; ++i)
1741 {
1742 decl = vl_decl_samplers(i, i);
1743 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1744 }
1745
1746 /*
1747 * tex2d t1, i0, s0 ; Read texel from luma texture
1748 * mov t0.x, t1.x ; Move luma sample into .x component
1749 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1750 * mov t0.y, t1.x ; Move Cb sample into .y component
1751 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1752 * mov t0.z, t1.x ; Move Cr sample into .z component
1753 */
1754 for (i = 0; i < 3; ++i)
1755 {
1756 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1757 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1758
1759 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1760 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1761 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1762 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1763 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1764 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1765
1766 }
1767
1768 /* mul t0, t0, c0 ; Rescale texel to correct range */
1769 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1770 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1771
1772 /* XXX: Pos values off by 0.5? */
1773 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1774 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1775 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1776 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1777 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1778 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1779 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1780 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1781 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1782 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1783 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1784
1785 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1786 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1787 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1788 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1789 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1790 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1791 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1792
1793 /* floor t3, t3 ; Get rid of fractional part */
1794 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1795 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1796
1797 /* mul t3, t3, c1.y ; Multiply by 2 */
1798 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1799 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1800 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1801 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1802 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1803 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1804
1805 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1806 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1807 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1808
1809 /*
1810 * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field
1811 * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field
1812 */
1813 for (i = 0; i < 2; ++i)
1814 {
1815 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
1816 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1817 }
1818
1819 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1820 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1821 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1822 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1823
1824 /*
1825 * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field
1826 * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field
1827 */
1828 for (i = 0; i < 2; ++i)
1829 {
1830 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4);
1831 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1832 }
1833
1834 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1835 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
1836 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
1837 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1838
1839 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1840 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1841 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1842 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1843 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1844 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1845 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1846
1847 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1848 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1849 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1850
1851 /* end */
1852 inst = vl_end();
1853 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1854
1855 fs.tokens = tokens;
1856 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
1857 free(tokens);
1858
1859 return 0;
1860 }
1861
1862 static int vlCreateDataBufs
1863 (
1864 struct vlR16SnormBufferedMC *mc
1865 )
1866 {
1867 const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
1868 const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
1869 const unsigned int num_mb_per_frame = mbw * mbh;
1870
1871 struct pipe_context *pipe;
1872 unsigned int g, h, i;
1873
1874 assert(mc);
1875
1876 pipe = mc->pipe;
1877
1878 for (g = 0; g < NUM_BUF_SETS; ++g)
1879 {
1880 for (h = 0; h < vlNumMacroBlockExTypes; ++h)
1881 {
1882 /* Create our vertex buffer and vertex buffer element */
1883 mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f);
1884 mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1;
1885 mc->vertex_bufs[g][h][0].buffer_offset = 0;
1886 mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create
1887 (
1888 pipe->winsys,
1889 1,
1890 PIPE_BUFFER_USAGE_VERTEX,
1891 sizeof(struct vlVertex2f) * 24 * num_mb_per_frame
1892 );
1893 }
1894 }
1895
1896 /* Position & block luma, block chroma texcoord element */
1897 mc->vertex_elems[0].src_offset = 0;
1898 mc->vertex_elems[0].vertex_buffer_index = 0;
1899 mc->vertex_elems[0].nr_components = 2;
1900 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
1901
1902 for (g = 0; g < NUM_BUF_SETS; ++g)
1903 {
1904 for (h = 0; h < vlNumMacroBlockExTypes; ++h)
1905 {
1906 for (i = 1; i < 3; ++i)
1907 {
1908 mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2;
1909 mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1;
1910 mc->vertex_bufs[g][h][i].buffer_offset = 0;
1911 mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create
1912 (
1913 pipe->winsys,
1914 1,
1915 PIPE_BUFFER_USAGE_VERTEX,
1916 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
1917 );
1918 }
1919 }
1920 }
1921
1922 /* First ref surface top field texcoord element */
1923 mc->vertex_elems[1].src_offset = 0;
1924 mc->vertex_elems[1].vertex_buffer_index = 1;
1925 mc->vertex_elems[1].nr_components = 2;
1926 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
1927
1928 /* First ref surface bottom field texcoord element */
1929 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f);
1930 mc->vertex_elems[2].vertex_buffer_index = 1;
1931 mc->vertex_elems[2].nr_components = 2;
1932 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
1933
1934 /* Second ref surface top field texcoord element */
1935 mc->vertex_elems[3].src_offset = 0;
1936 mc->vertex_elems[3].vertex_buffer_index = 2;
1937 mc->vertex_elems[3].nr_components = 2;
1938 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
1939
1940 /* Second ref surface bottom field texcoord element */
1941 mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f);
1942 mc->vertex_elems[4].vertex_buffer_index = 2;
1943 mc->vertex_elems[4].nr_components = 2;
1944 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
1945
1946 /* Create our constant buffer */
1947 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
1948 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
1949 (
1950 pipe->winsys,
1951 1,
1952 PIPE_BUFFER_USAGE_CONSTANT,
1953 mc->vs_const_buf.size
1954 );
1955
1956 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
1957 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
1958 (
1959 pipe->winsys,
1960 1,
1961 PIPE_BUFFER_USAGE_CONSTANT,
1962 mc->fs_const_buf.size
1963 );
1964
1965 memcpy
1966 (
1967 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1968 &fs_consts,
1969 sizeof(struct vlFragmentShaderConsts)
1970 );
1971
1972 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
1973
1974 return 0;
1975 }
1976
1977 static int vlInit
1978 (
1979 struct vlR16SnormBufferedMC *mc
1980 )
1981 {
1982 struct pipe_context *pipe;
1983 struct pipe_sampler_state sampler;
1984 struct pipe_texture template;
1985 unsigned int filters[5];
1986 unsigned int i;
1987
1988 assert(mc);
1989
1990 pipe = mc->pipe;
1991
1992 /* For MC we render to textures, which are rounded up to nearest POT */
1993 mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width);
1994 mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height);
1995 mc->viewport.scale[2] = 1;
1996 mc->viewport.scale[3] = 1;
1997 mc->viewport.translate[0] = 0;
1998 mc->viewport.translate[1] = 0;
1999 mc->viewport.translate[2] = 0;
2000 mc->viewport.translate[3] = 0;
2001
2002 mc->render_target.width = vlRoundUpPOT(mc->video_width);
2003 mc->render_target.height = vlRoundUpPOT(mc->video_height);
2004 mc->render_target.num_cbufs = 1;
2005 /* FB for MC stage is a vlSurface created by the user, set at render time */
2006 mc->render_target.zsbuf = NULL;
2007
2008 filters[0] = PIPE_TEX_FILTER_NEAREST;
2009 /* FIXME: Linear causes discoloration around block edges */
2010 filters[1] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2011 filters[2] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2012 filters[3] = PIPE_TEX_FILTER_LINEAR;
2013 filters[4] = PIPE_TEX_FILTER_LINEAR;
2014
2015 for (i = 0; i < 5; ++i)
2016 {
2017 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2018 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2019 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2020 sampler.min_img_filter = filters[i];
2021 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2022 sampler.mag_img_filter = filters[i];
2023 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2024 sampler.compare_func = PIPE_FUNC_ALWAYS;
2025 sampler.normalized_coords = 1;
2026 /*sampler.prefilter = ;*/
2027 /*sampler.shadow_ambient = ;*/
2028 /*sampler.lod_bias = ;*/
2029 sampler.min_lod = 0;
2030 /*sampler.max_lod = ;*/
2031 /*sampler.border_color[i] = ;*/
2032 /*sampler.max_anisotropy = ;*/
2033 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2034 }
2035
2036 memset(&template, 0, sizeof(struct pipe_texture));
2037 template.target = PIPE_TEXTURE_2D;
2038 template.format = PIPE_FORMAT_R16_SNORM;
2039 template.last_level = 0;
2040 template.width[0] = vlRoundUpPOT(mc->video_width);
2041 template.height[0] = vlRoundUpPOT(mc->video_height);
2042 template.depth[0] = 1;
2043 template.compressed = 0;
2044 pf_get_block(template.format, &template.block);
2045
2046 for (i = 0; i < NUM_BUF_SETS; ++i)
2047 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2048
2049 if (mc->video_format == vlFormatYCbCr420)
2050 {
2051 template.width[0] = vlRoundUpPOT(mc->video_width / 2);
2052 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2053 }
2054 else if (mc->video_format == vlFormatYCbCr422)
2055 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2056
2057 for (i = 0; i < NUM_BUF_SETS; ++i)
2058 {
2059 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2060 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2061 }
2062
2063 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2064
2065 vlCreateVertexShaderIMB(mc);
2066 vlCreateFragmentShaderIMB(mc);
2067 vlCreateVertexShaderFramePMB(mc);
2068 vlCreateVertexShaderFieldPMB(mc);
2069 vlCreateFragmentShaderFramePMB(mc);
2070 vlCreateFragmentShaderFieldPMB(mc);
2071 vlCreateVertexShaderFrameBMB(mc);
2072 vlCreateVertexShaderFieldBMB(mc);
2073 vlCreateFragmentShaderFrameBMB(mc);
2074 vlCreateFragmentShaderFieldBMB(mc);
2075 vlCreateDataBufs(mc);
2076
2077 return 0;
2078 }
2079
2080 int vlCreateR16SNormBufferedMC
2081 (
2082 struct pipe_context *pipe,
2083 unsigned int video_width,
2084 unsigned int video_height,
2085 enum vlFormat video_format,
2086 struct vlRender **render
2087 )
2088 {
2089 struct vlR16SnormBufferedMC *mc;
2090
2091 assert(pipe);
2092 assert(render);
2093
2094 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2095
2096 mc->base.vlBegin = &vlBegin;
2097 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2098 mc->base.vlEnd = &vlEnd;
2099 mc->base.vlFlush = &vlFlush;
2100 mc->base.vlDestroy = &vlDestroy;
2101 mc->pipe = pipe;
2102 mc->video_width = video_width;
2103 mc->video_height = video_height;
2104
2105 mc->cur_buf = 0;
2106 mc->buffered_surface = NULL;
2107 mc->past_surface = NULL;
2108 mc->future_surface = NULL;
2109 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes);
2110
2111 vlInit(mc);
2112
2113 *render = &mc->base;
2114
2115 return 0;
2116 }