fc383cb8f6df2fa0bf9328b00428205ba13c6893
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
21
22 enum vlMacroBlockTypeEx
23 {
24 vlMacroBlockExTypeIntra,
25 vlMacroBlockExTypeFwdPredictedFrame,
26 vlMacroBlockExTypeFwdPredictedField,
27 vlMacroBlockExTypeBkwdPredictedFrame,
28 vlMacroBlockExTypeBkwdPredictedField,
29 vlMacroBlockExTypeBiPredictedFrame,
30 vlMacroBlockExTypeBiPredictedField,
31
32 vlNumMacroBlockExTypes
33 };
34
35 struct vlVertexShaderConsts
36 {
37 struct vlVertex4f denorm;
38 };
39
40 struct vlFragmentShaderConsts
41 {
42 struct vlVertex4f multiplier;
43 struct vlVertex4f div;
44 };
45
46 struct vlR16SnormBufferedMC
47 {
48 struct vlRender base;
49
50 unsigned int video_width, video_height;
51 enum vlFormat video_format;
52
53 unsigned int cur_buf;
54 struct vlSurface *buffered_surface;
55 struct vlSurface *past_surface, *future_surface;
56 struct vlVertex2f surface_tex_inv_size;
57 unsigned int num_macroblocks[vlNumMacroBlockExTypes];
58 unsigned int total_num_macroblocks;
59
60 struct pipe_context *pipe;
61 struct pipe_viewport_state viewport;
62 struct pipe_framebuffer_state render_target;
63 struct pipe_sampler_state *samplers[5];
64 struct pipe_texture *textures[NUM_BUF_SETS][5];
65 void *i_vs, *p_vs[2], *b_vs[2];
66 void *i_fs, *p_fs[2], *b_fs[2];
67 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3];
68 struct pipe_vertex_element vertex_elems[5];
69 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
70 };
71
72 static int vlBegin
73 (
74 struct vlRender *render
75 )
76 {
77 assert(render);
78
79 return 0;
80 }
81
82 static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
83 {
84 unsigned int y;
85
86 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
87 memcpy
88 (
89 dst + y * dst_pitch,
90 src + y * VL_BLOCK_WIDTH,
91 VL_BLOCK_WIDTH * 2
92 );
93
94 return 0;
95 }
96
97 static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
98 {
99 unsigned int y;
100
101 for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
102 memcpy
103 (
104 dst + y * dst_pitch * 2,
105 src + y * VL_BLOCK_WIDTH,
106 VL_BLOCK_WIDTH * 2
107 );
108
109 dst += VL_BLOCK_HEIGHT * dst_pitch;
110
111 for (; y < VL_BLOCK_HEIGHT; ++y)
112 memcpy
113 (
114 dst + y * dst_pitch * 2,
115 src + y * VL_BLOCK_WIDTH,
116 VL_BLOCK_WIDTH * 2
117 );
118
119 return 0;
120 }
121
122 static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
123 {
124 unsigned int y;
125
126 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
127 memset
128 (
129 dst + y * dst_pitch,
130 0,
131 VL_BLOCK_WIDTH * 2
132 );
133
134 return 0;
135 }
136
137 static int vlGrabBlocks
138 (
139 struct vlR16SnormBufferedMC *mc,
140 unsigned int mbx,
141 unsigned int mby,
142 enum vlDCTType dct_type,
143 unsigned int coded_block_pattern,
144 short *blocks
145 )
146 {
147 struct pipe_surface *tex_surface;
148 short *texels;
149 unsigned int tex_pitch;
150 unsigned int x, y, tb = 0, sb = 0;
151 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
152
153 assert(mc);
154 assert(blocks);
155
156 tex_surface = mc->pipe->screen->get_tex_surface
157 (
158 mc->pipe->screen,
159 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
161 );
162
163 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
164 tex_pitch = tex_surface->stride / tex_surface->block.size;
165
166 texels += mbpy * tex_pitch + mbpx;
167
168 for (y = 0; y < 2; ++y)
169 {
170 for (x = 0; x < 2; ++x, ++tb)
171 {
172 if ((coded_block_pattern >> (5 - tb)) & 1)
173 {
174 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
175
176 if (dct_type == vlDCTTypeFrameCoded)
177 {
178 vlGrabFrameCodedBlock
179 (
180 cur_block,
181 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
182 tex_pitch
183 );
184 }
185 else
186 {
187 vlGrabFieldCodedBlock
188 (
189 cur_block,
190 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
191 tex_pitch
192 );
193 }
194
195 ++sb;
196 }
197 else
198 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
199 }
200 }
201
202 pipe_surface_unmap(tex_surface);
203
204 /* TODO: Implement 422, 444 */
205 mbpx >>= 1;
206 mbpy >>= 1;
207
208 for (tb = 0; tb < 2; ++tb)
209 {
210 tex_surface = mc->pipe->screen->get_tex_surface
211 (
212 mc->pipe->screen,
213 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
214 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
215 );
216
217 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
218 tex_pitch = tex_surface->stride / tex_surface->block.size;
219
220 texels += mbpy * tex_pitch + mbpx;
221
222 if ((coded_block_pattern >> (1 - tb)) & 1)
223 {
224 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
225
226 vlGrabFrameCodedBlock
227 (
228 cur_block,
229 texels,
230 tex_pitch
231 );
232
233 ++sb;
234 }
235 else
236 vlGrabNoBlock(texels, tex_pitch);
237
238 pipe_surface_unmap(tex_surface);
239 }
240
241 return 0;
242 }
243
244 #if 0
245 static int vlGrabMacroBlock
246 (
247 struct vlR16SnormBufferedMC *mc,
248 struct vlMpeg2MacroBlock *macroblock
249 )
250 {
251 const struct vlVertex2f unit =
252 {
253 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
254 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
255 };
256 const struct vlVertex2f half =
257 {
258 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
259 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
260 };
261
262 struct vlVertex2f *vb;
263 enum vlMacroBlockTypeEx mb_type_ex;
264 struct vlVertex2f mo_vec[2];
265 unsigned int i;
266
267 assert(mc);
268 assert(macroblock);
269
270 switch (macroblock->mb_type)
271 {
272 case vlMacroBlockTypeIntra:
273 {
274 mb_type_ex = vlMacroBlockExTypeIntra;
275 break;
276 }
277 case vlMacroBlockTypeFwdPredicted:
278 {
279 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
280 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
281 break;
282 }
283 case vlMacroBlockTypeBkwdPredicted:
284 {
285 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
286 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
287 break;
288 }
289 case vlMacroBlockTypeBiPredicted:
290 {
291 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
292 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
293 break;
294 }
295 default:
296 assert(0);
297 }
298
299 switch (macroblock->mb_type)
300 {
301 case vlMacroBlockTypeBiPredicted:
302 {
303 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
304 (
305 mc->pipe->winsys,
306 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer,
307 PIPE_BUFFER_USAGE_CPU_WRITE
308 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
309
310 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
311 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
312
313 if (macroblock->mo_type == vlMotionTypeFrame)
314 {
315 for (i = 0; i < 24 * 2; i += 2)
316 {
317 vb[i].x = mo_vec[0].x;
318 vb[i].y = mo_vec[0].y;
319 }
320 }
321 else
322 {
323 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
324 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
325
326 for (i = 0; i < 24 * 2; i += 2)
327 {
328 vb[i].x = mo_vec[0].x;
329 vb[i].y = mo_vec[0].y;
330 vb[i + 1].x = mo_vec[1].x;
331 vb[i + 1].y = mo_vec[1].y;
332 }
333 }
334
335 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer);
336
337 /* fall-through */
338 }
339 case vlMacroBlockTypeFwdPredicted:
340 case vlMacroBlockTypeBkwdPredicted:
341 {
342 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
343 (
344 mc->pipe->winsys,
345 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer,
346 PIPE_BUFFER_USAGE_CPU_WRITE
347 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
348
349 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
350 {
351 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
352 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
353
354 if (macroblock->mo_type == vlMotionTypeField)
355 {
356 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
357 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
358 }
359 }
360 else
361 {
362 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
363 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
364
365 if (macroblock->mo_type == vlMotionTypeField)
366 {
367 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
368 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
369 }
370 }
371
372 if (macroblock->mo_type == vlMotionTypeFrame)
373 {
374 for (i = 0; i < 24 * 2; i += 2)
375 {
376 vb[i].x = mo_vec[0].x;
377 vb[i].y = mo_vec[0].y;
378 }
379 }
380 else
381 {
382 for (i = 0; i < 24 * 2; i += 2)
383 {
384 vb[i].x = mo_vec[0].x;
385 vb[i].y = mo_vec[0].y;
386 vb[i + 1].x = mo_vec[1].x;
387 vb[i + 1].y = mo_vec[1].y;
388 }
389 }
390
391 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer);
392
393 /* fall-through */
394 }
395 case vlMacroBlockTypeIntra:
396 {
397 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
398 (
399 mc->pipe->winsys,
400 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer,
401 PIPE_BUFFER_USAGE_CPU_WRITE
402 ) + mc->num_macroblocks[mb_type_ex] * 24;
403
404 vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y;
405 vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y;
406 vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y;
407
408 vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y;
409 vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y;
410 vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y;
411
412 vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y;
413 vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y;
414 vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y;
415
416 vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y;
417 vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y;
418 vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y;
419
420 vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y;
421 vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y;
422 vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y;
423
424 vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y;
425 vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y;
426 vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y;
427
428 vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y;
429 vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y;
430 vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y;
431
432 vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y;
433 vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y;
434 vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y;
435
436 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer);
437
438 break;
439 }
440 default:
441 assert(0);
442 }
443
444 vlGrabBlocks
445 (
446 mc,
447 macroblock->mbx,
448 macroblock->mby,
449 macroblock->dct_type,
450 macroblock->cbp,
451 macroblock->blocks
452 );
453
454 mc->num_macroblocks[mb_type_ex]++;
455 mc->total_num_macroblocks++;
456
457 return 0;
458 }
459 #else
460 static int vlGrabMacroBlock
461 (
462 struct vlR16SnormBufferedMC *mc,
463 struct vlMpeg2MacroBlock *macroblock
464 )
465 {
466 const struct vlVertex2f unit =
467 {
468 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
469 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
470 };
471 const struct vlVertex2f half =
472 {
473 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
474 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
475 };
476
477 struct vlVertex2f *vb;
478 unsigned int mb_buf_id;
479 struct vlVertex2f mo_vec[2];
480 unsigned int i;
481
482 assert(mc);
483 assert(macroblock);
484
485 switch (macroblock->mb_type)
486 {
487 case vlMacroBlockTypeIntra:
488 {
489 mb_buf_id = vlMacroBlockExTypeIntra;
490 break;
491 }
492 case vlMacroBlockTypeFwdPredicted:
493 {
494 mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ?
495 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
496 break;
497 }
498 case vlMacroBlockTypeBkwdPredicted:
499 {
500 mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ?
501 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
502 break;
503 }
504 case vlMacroBlockTypeBiPredicted:
505 {
506 mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ?
507 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
508 break;
509 }
510 default:
511 assert(0);
512 }
513
514 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
515 (
516 mc->pipe->winsys,
517 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer,
518 PIPE_BUFFER_USAGE_CPU_WRITE
519 ) + mc->num_macroblocks[mb_buf_id] * 24;
520
521 vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y;
522 vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y;
523 vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y;
524
525 vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y;
526 vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y;
527 vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y;
528
529 vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y;
530 vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y;
531 vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y;
532
533 vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y;
534 vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y;
535 vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y;
536
537 vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y;
538 vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y;
539 vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y;
540
541 vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y;
542 vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y;
543 vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y;
544
545 vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y;
546 vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y;
547 vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y;
548
549 vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y;
550 vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y;
551 vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y;
552
553 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer);
554
555 if (macroblock->mb_type == vlMacroBlockTypeIntra)
556 {
557 vlGrabBlocks
558 (
559 mc,
560 macroblock->mbx,
561 macroblock->mby,
562 macroblock->dct_type,
563 macroblock->cbp,
564 macroblock->blocks
565 );
566
567 mc->num_macroblocks[mb_buf_id]++;
568 mc->total_num_macroblocks++;
569 return 0;
570 }
571
572 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
573 (
574 mc->pipe->winsys,
575 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer,
576 PIPE_BUFFER_USAGE_CPU_WRITE
577 ) + mc->num_macroblocks[mb_buf_id] * 2 * 24;
578
579 if (macroblock->mb_type == vlMacroBlockTypeFwdPredicted || macroblock->mb_type == vlMacroBlockTypeBiPredicted)
580 {
581 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
582 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
583
584 if (macroblock->mo_type == vlMotionTypeField)
585 {
586 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
587 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
588 }
589 }
590 else
591 {
592 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
593 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
594
595 if (macroblock->mo_type == vlMotionTypeField)
596 {
597 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
598 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
599 }
600 }
601
602 if (macroblock->mo_type == vlMotionTypeFrame)
603 {
604 for (i = 0; i < 24 * 2; i += 2)
605 {
606 vb[i].x = mo_vec[0].x;
607 vb[i].y = mo_vec[0].y;
608 }
609 }
610 else
611 {
612 for (i = 0; i < 24 * 2; i += 2)
613 {
614 vb[i].x = mo_vec[0].x;
615 vb[i].y = mo_vec[0].y;
616 vb[i + 1].x = mo_vec[1].x;
617 vb[i + 1].y = mo_vec[1].y;
618 }
619 }
620
621 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer);
622
623 if (macroblock->mb_type != vlMacroBlockTypeBiPredicted)
624 {
625 vlGrabBlocks
626 (
627 mc,
628 macroblock->mbx,
629 macroblock->mby,
630 macroblock->dct_type,
631 macroblock->cbp,
632 macroblock->blocks
633 );
634
635 mc->num_macroblocks[mb_buf_id]++;
636 mc->total_num_macroblocks++;
637 return 0;
638 }
639
640 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
641 (
642 mc->pipe->winsys,
643 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer,
644 PIPE_BUFFER_USAGE_CPU_WRITE
645 ) + mc->num_macroblocks[mb_buf_id] * 2 * 24;
646
647 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
648 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
649
650 if (macroblock->mo_type == vlMotionTypeFrame)
651 {
652 for (i = 0; i < 24 * 2; i += 2)
653 {
654 vb[i].x = mo_vec[0].x;
655 vb[i].y = mo_vec[0].y;
656 }
657 }
658 else
659 {
660 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
661 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
662
663 for (i = 0; i < 24 * 2; i += 2)
664 {
665 vb[i].x = mo_vec[0].x;
666 vb[i].y = mo_vec[0].y;
667 vb[i + 1].x = mo_vec[1].x;
668 vb[i + 1].y = mo_vec[1].y;
669 }
670 }
671
672 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer);
673
674 vlGrabBlocks
675 (
676 mc,
677 macroblock->mbx,
678 macroblock->mby,
679 macroblock->dct_type,
680 macroblock->cbp,
681 macroblock->blocks
682 );
683
684 mc->num_macroblocks[mb_buf_id]++;
685 mc->total_num_macroblocks++;
686
687 return 0;
688 }
689 #endif
690
691 static int vlFlush
692 (
693 struct vlRender *render
694 )
695 {
696 struct vlR16SnormBufferedMC *mc;
697 struct pipe_context *pipe;
698 struct vlVertexShaderConsts *vs_consts;
699
700 assert(mc);
701
702 mc = (struct vlR16SnormBufferedMC*)render;
703 pipe = mc->pipe;
704
705 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
706 (
707 pipe->screen,
708 mc->buffered_surface->texture,
709 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
710 );
711
712 pipe->set_framebuffer_state(pipe, &mc->render_target);
713 pipe->set_viewport_state(pipe, &mc->viewport);
714 vs_consts = pipe->winsys->buffer_map
715 (
716 pipe->winsys,
717 mc->vs_const_buf.buffer,
718 PIPE_BUFFER_USAGE_CPU_WRITE
719 );
720
721 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
722 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
723
724 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
725 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
726
727 if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0)
728 {
729 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]);
730 pipe->set_vertex_elements(pipe, 1, mc->vertex_elems);
731 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
732 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
733 pipe->bind_vs_state(pipe, mc->i_vs);
734 pipe->bind_fs_state(pipe, mc->i_fs);
735
736 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24);
737 }
738
739 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
740 {
741 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]);
742 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
743 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
744 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
745 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
746 pipe->bind_vs_state(pipe, mc->p_vs[0]);
747 pipe->bind_fs_state(pipe, mc->p_fs[0]);
748
749 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
750 }
751
752 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
753 {
754 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]);
755 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
756 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
757 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
758 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
759 pipe->bind_vs_state(pipe, mc->p_vs[1]);
760 pipe->bind_fs_state(pipe, mc->p_fs[1]);
761
762 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
763 }
764
765 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
766 {
767 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]);
768 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
769 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
770 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
771 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
772 pipe->bind_vs_state(pipe, mc->p_vs[0]);
773 pipe->bind_fs_state(pipe, mc->p_fs[0]);
774
775 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
776 }
777
778 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
779 {
780 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]);
781 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
782 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
783 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
784 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
785 pipe->bind_vs_state(pipe, mc->p_vs[1]);
786 pipe->bind_fs_state(pipe, mc->p_fs[1]);
787
788 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
789 }
790
791 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
792 {
793 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]);
794 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
795 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
796 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
797 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
798 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
799 pipe->bind_vs_state(pipe, mc->b_vs[0]);
800 pipe->bind_fs_state(pipe, mc->b_fs[0]);
801
802 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
803 }
804
805 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
806 {
807 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]);
808 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
809 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
810 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
811 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
812 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
813 pipe->bind_vs_state(pipe, mc->b_vs[1]);
814 pipe->bind_fs_state(pipe, mc->b_fs[1]);
815
816 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
817 }
818
819 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7);
820 mc->total_num_macroblocks = 0;
821
822 return 0;
823 }
824
825 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
826 (
827 struct vlRender *render,
828 struct vlMpeg2MacroBlockBatch *batch,
829 struct vlSurface *surface
830 )
831 {
832 struct vlR16SnormBufferedMC *mc;
833 unsigned int i;
834
835 assert(render);
836
837 mc = (struct vlR16SnormBufferedMC*)render;
838
839 if (mc->buffered_surface)
840 {
841 if
842 (
843 mc->buffered_surface != surface /*||
844 mc->past_surface != batch->past_surface ||
845 mc->future_surface != batch->future_surface*/
846 )
847 {
848 vlFlush(&mc->base);
849 mc->buffered_surface = surface;
850 mc->past_surface = batch->past_surface;
851 mc->future_surface = batch->future_surface;
852 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
853 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
854 }
855 }
856 else
857 {
858 mc->buffered_surface = surface;
859 mc->past_surface = batch->past_surface;
860 mc->future_surface = batch->future_surface;
861 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
862 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
863 }
864
865 for (i = 0; i < batch->num_macroblocks; ++i)
866 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
867
868 return 0;
869 }
870
871 static int vlEnd
872 (
873 struct vlRender *render
874 )
875 {
876 assert(render);
877
878 return 0;
879 }
880
881 static int vlDestroy
882 (
883 struct vlRender *render
884 )
885 {
886 struct vlR16SnormBufferedMC *mc;
887 struct pipe_context *pipe;
888 unsigned int g, h, i;
889
890 assert(render);
891
892 mc = (struct vlR16SnormBufferedMC*)render;
893 pipe = mc->pipe;
894
895 for (i = 0; i < 5; ++i)
896 pipe->delete_sampler_state(pipe, mc->samplers[i]);
897
898 for (g = 0; g < NUM_BUF_SETS; ++g)
899 for (h = 0; h < 7; ++h)
900 for (i = 0; i < 3; ++i)
901 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer);
902
903 /* Textures 3 & 4 are not created directly, no need to release them here */
904 for (i = 0; i < NUM_BUF_SETS; ++i)
905 {
906 pipe_texture_release(&mc->textures[i][0]);
907 pipe_texture_release(&mc->textures[i][1]);
908 pipe_texture_release(&mc->textures[i][2]);
909 }
910
911 pipe->delete_vs_state(pipe, mc->i_vs);
912 pipe->delete_fs_state(pipe, mc->i_fs);
913
914 for (i = 0; i < 2; ++i)
915 {
916 pipe->delete_vs_state(pipe, mc->p_vs[i]);
917 pipe->delete_fs_state(pipe, mc->p_fs[i]);
918 pipe->delete_vs_state(pipe, mc->b_vs[i]);
919 pipe->delete_fs_state(pipe, mc->b_fs[i]);
920 }
921
922 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
923 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
924
925 free(mc);
926
927 return 0;
928 }
929
930 /*
931 * Muliplier renormalizes block samples from 16 bits to 12 bits.
932 * Divider is used when calculating Y % 2 for choosing top or bottom
933 * field for P or B macroblocks.
934 * TODO: Use immediates.
935 */
936 static const struct vlFragmentShaderConsts fs_consts =
937 {
938 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
939 {0.5f, 2.0f, 0.0f, 0.0f}
940 };
941
942 static int vlCreateVertexShaderIMB
943 (
944 struct vlR16SnormBufferedMC *mc
945 )
946 {
947 const unsigned int max_tokens = 50;
948
949 struct pipe_context *pipe;
950 struct pipe_shader_state vs;
951 struct tgsi_token *tokens;
952 struct tgsi_header *header;
953
954 struct tgsi_full_declaration decl;
955 struct tgsi_full_instruction inst;
956
957 unsigned int ti;
958 unsigned int i;
959
960 assert(mc);
961
962 pipe = mc->pipe;
963 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
964
965 /* Version */
966 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
967 /* Header */
968 header = (struct tgsi_header*)&tokens[1];
969 *header = tgsi_build_header();
970 /* Processor */
971 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
972
973 ti = 3;
974
975 /*
976 * decl i0 ; Vertex pos, luma & chroma texcoords
977 */
978 for (i = 0; i < 3; i++)
979 {
980 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
981 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
982 }
983
984 /*
985 * decl o0 ; Vertex pos
986 * decl o1 ; Luma/chroma texcoords
987 */
988 for (i = 0; i < 2; i++)
989 {
990 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
991 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
992 }
993
994 /*
995 * mov o0, i0 ; Move input vertex pos to output
996 * mov o1, i0 ; Move input luma/chroma texcoords to output
997 */
998 for (i = 0; i < 2; ++i)
999 {
1000 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1001 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1002 }
1003
1004 /* end */
1005 inst = vl_end();
1006 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1007
1008 vs.tokens = tokens;
1009 mc->i_vs = pipe->create_vs_state(pipe, &vs);
1010 free(tokens);
1011
1012 return 0;
1013 }
1014
1015 static int vlCreateFragmentShaderIMB
1016 (
1017 struct vlR16SnormBufferedMC *mc
1018 )
1019 {
1020 const unsigned int max_tokens = 100;
1021
1022 struct pipe_context *pipe;
1023 struct pipe_shader_state fs;
1024 struct tgsi_token *tokens;
1025 struct tgsi_header *header;
1026
1027 struct tgsi_full_declaration decl;
1028 struct tgsi_full_instruction inst;
1029
1030 unsigned int ti;
1031 unsigned int i;
1032
1033 assert(mc);
1034
1035 pipe = mc->pipe;
1036 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1037
1038 /* Version */
1039 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1040 /* Header */
1041 header = (struct tgsi_header*)&tokens[1];
1042 *header = tgsi_build_header();
1043 /* Processor */
1044 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1045
1046 ti = 3;
1047
1048 /* decl i0 ; Luma/chroma texcoords */
1049 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
1050 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1051
1052 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1053 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1054 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1055
1056 /* decl o0 ; Fragment color */
1057 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1058 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1059
1060 /* decl t0, t1 */
1061 decl = vl_decl_temps(0, 1);
1062 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1063
1064 /*
1065 * decl s0 ; Sampler for luma texture
1066 * decl s1 ; Sampler for chroma Cb texture
1067 * decl s2 ; Sampler for chroma Cr texture
1068 */
1069 for (i = 0; i < 3; ++i)
1070 {
1071 decl = vl_decl_samplers(i, i);
1072 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
1073 }
1074
1075 /*
1076 * tex2d t1, i0, s0 ; Read texel from luma texture
1077 * mov t0.x, t1.x ; Move luma sample into .x component
1078 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1079 * mov t0.y, t1.x ; Move Cb sample into .y component
1080 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1081 * mov t0.z, t1.x ; Move Cr sample into .z component
1082 */
1083 for (i = 0; i < 3; ++i)
1084 {
1085 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1086 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1087
1088 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1089 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1090 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1091 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1092 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1093 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1094
1095 }
1096
1097 /* mul o0, t0, c0 ; Rescale texel to correct range */
1098 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1099 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1100
1101 /* end */
1102 inst = vl_end();
1103 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1104
1105 fs.tokens = tokens;
1106 mc->i_fs = pipe->create_fs_state(pipe, &fs);
1107 free(tokens);
1108
1109 return 0;
1110 }
1111
1112 static int vlCreateVertexShaderFramePMB
1113 (
1114 struct vlR16SnormBufferedMC *mc
1115 )
1116 {
1117 const unsigned int max_tokens = 100;
1118
1119 struct pipe_context *pipe;
1120 struct pipe_shader_state vs;
1121 struct tgsi_token *tokens;
1122 struct tgsi_header *header;
1123
1124 struct tgsi_full_declaration decl;
1125 struct tgsi_full_instruction inst;
1126
1127 unsigned int ti;
1128 unsigned int i;
1129
1130 assert(mc);
1131
1132 pipe = mc->pipe;
1133 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1134
1135 /* Version */
1136 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1137 /* Header */
1138 header = (struct tgsi_header*)&tokens[1];
1139 *header = tgsi_build_header();
1140 /* Processor */
1141 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1142
1143 ti = 3;
1144
1145 /*
1146 * decl i0 ; Vertex pos, luma/chroma texcoords
1147 * decl i1 ; Ref surface top field texcoords
1148 * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1149 */
1150 for (i = 0; i < 3; i++)
1151 {
1152 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1153 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1154 }
1155
1156 /*
1157 * decl o0 ; Vertex pos
1158 * decl o1 ; Luma/chroma texcoords
1159 * decl o2 ; Ref macroblock texcoords
1160 */
1161 for (i = 0; i < 3; i++)
1162 {
1163 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1164 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1165 }
1166
1167 /*
1168 * mov o0, i0 ; Move input vertex pos to output
1169 * mov o1, i0 ; Move input luma/chroma texcoords to output
1170 */
1171 for (i = 0; i < 2; ++i)
1172 {
1173 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1174 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1175 }
1176
1177 /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1178 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1);
1179 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1180
1181 /* end */
1182 inst = vl_end();
1183 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1184
1185 vs.tokens = tokens;
1186 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
1187 free(tokens);
1188
1189 return 0;
1190 }
1191
1192 static int vlCreateVertexShaderFieldPMB
1193 (
1194 struct vlR16SnormBufferedMC *mc
1195 )
1196 {
1197 const unsigned int max_tokens = 100;
1198
1199 struct pipe_context *pipe;
1200 struct pipe_shader_state vs;
1201 struct tgsi_token *tokens;
1202 struct tgsi_header *header;
1203
1204 struct tgsi_full_declaration decl;
1205 struct tgsi_full_instruction inst;
1206
1207 unsigned int ti;
1208 unsigned int i;
1209
1210 assert(mc);
1211
1212 pipe = mc->pipe;
1213 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1214
1215 /* Version */
1216 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1217 /* Header */
1218 header = (struct tgsi_header*)&tokens[1];
1219 *header = tgsi_build_header();
1220 /* Processor */
1221 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1222
1223 ti = 3;
1224
1225 /*
1226 * decl i0 ; Vertex pos, luma/chroma texcoords
1227 * decl i1 ; Ref surface top field texcoords
1228 * decl i2 ; Ref surface bottom field texcoords
1229 */
1230 for (i = 0; i < 3; i++)
1231 {
1232 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1233 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1234 }
1235
1236 /* decl c0 ; Texcoord denorm coefficients */
1237 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1238 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1239
1240 /*
1241 * decl o0 ; Vertex pos
1242 * decl o1 ; Luma/chroma texcoords
1243 * decl o2 ; Top field ref macroblock texcoords
1244 * decl o3 ; Bottom field ref macroblock texcoords
1245 * decl o4 ; Denormalized vertex pos
1246 */
1247 for (i = 0; i < 5; i++)
1248 {
1249 decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1250 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1251 }
1252
1253 /*
1254 * mov o0, i0 ; Move input vertex pos to output
1255 * mov o1, i0 ; Move input luma/chroma texcoords to output
1256 */
1257 for (i = 0; i < 3; ++i)
1258 {
1259 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1);
1260 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1261 }
1262
1263 /*
1264 * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1265 * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1266 */
1267 for (i = 0; i < 2; ++i)
1268 {
1269 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1270 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1271 }
1272
1273 /* mul o4, i0, c0 ; Denorm vertex pos */
1274 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1275 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1276
1277 /* end */
1278 inst = vl_end();
1279 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1280
1281 vs.tokens = tokens;
1282 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1283 free(tokens);
1284
1285 return 0;
1286 }
1287
1288 static int vlCreateFragmentShaderFramePMB
1289 (
1290 struct vlR16SnormBufferedMC *mc
1291 )
1292 {
1293 const unsigned int max_tokens = 100;
1294
1295 struct pipe_context *pipe;
1296 struct pipe_shader_state fs;
1297 struct tgsi_token *tokens;
1298 struct tgsi_header *header;
1299
1300 struct tgsi_full_declaration decl;
1301 struct tgsi_full_instruction inst;
1302
1303 unsigned int ti;
1304 unsigned int i;
1305
1306 assert(mc);
1307
1308 pipe = mc->pipe;
1309 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1310
1311 /* Version */
1312 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1313 /* Header */
1314 header = (struct tgsi_header*)&tokens[1];
1315 *header = tgsi_build_header();
1316 /* Processor */
1317 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1318
1319 ti = 3;
1320
1321 /*
1322 * decl i0 ; Texcoords for s0, s1, s2
1323 * decl i1 ; Texcoords for s3
1324 */
1325 for (i = 0; i < 2; ++i)
1326 {
1327 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1328 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1329 }
1330
1331 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1332 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1333 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1334
1335 /* decl o0 ; Fragment color */
1336 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1337 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1338
1339 /* decl t0, t1 */
1340 decl = vl_decl_temps(0, 1);
1341 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1342
1343 /*
1344 * decl s0 ; Sampler for luma texture
1345 * decl s1 ; Sampler for chroma Cb texture
1346 * decl s2 ; Sampler for chroma Cr texture
1347 * decl s3 ; Sampler for ref surface texture
1348 */
1349 for (i = 0; i < 4; ++i)
1350 {
1351 decl = vl_decl_samplers(i, i);
1352 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1353 }
1354
1355 /*
1356 * tex2d t1, i0, s0 ; Read texel from luma texture
1357 * mov t0.x, t1.x ; Move luma sample into .x component
1358 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1359 * mov t0.y, t1.x ; Move Cb sample into .y component
1360 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1361 * mov t0.z, t1.x ; Move Cr sample into .z component
1362 */
1363 for (i = 0; i < 3; ++i)
1364 {
1365 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1366 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1367
1368 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1369 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1370 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1371 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1372 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1373 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1374
1375 }
1376
1377 /* mul t0, t0, c0 ; Rescale texel to correct range */
1378 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1379 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1380
1381 /* tex2d t1, i1, s3 ; Read texel from ref macroblock */
1382 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3);
1383 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1384
1385 /* add o0, t0, t1 ; Add ref and differential to form final output */
1386 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1387 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1388
1389 /* end */
1390 inst = vl_end();
1391 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1392
1393 fs.tokens = tokens;
1394 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1395 free(tokens);
1396
1397 return 0;
1398 }
1399
1400 static int vlCreateFragmentShaderFieldPMB
1401 (
1402 struct vlR16SnormBufferedMC *mc
1403 )
1404 {
1405 const unsigned int max_tokens = 200;
1406
1407 struct pipe_context *pipe;
1408 struct pipe_shader_state fs;
1409 struct tgsi_token *tokens;
1410 struct tgsi_header *header;
1411
1412 struct tgsi_full_declaration decl;
1413 struct tgsi_full_instruction inst;
1414
1415 unsigned int ti;
1416 unsigned int i;
1417
1418 assert(mc);
1419
1420 pipe = mc->pipe;
1421 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1422
1423 /* Version */
1424 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1425 /* Header */
1426 header = (struct tgsi_header*)&tokens[1];
1427 *header = tgsi_build_header();
1428 /* Processor */
1429 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1430
1431 ti = 3;
1432
1433 /*
1434 * decl i0 ; Texcoords for s0, s1, s2
1435 * decl i1 ; Texcoords for s3
1436 * decl i2 ; Texcoords for s3
1437 * decl i3 ; Denormalized vertex pos
1438 */
1439 for (i = 0; i < 4; ++i)
1440 {
1441 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1442 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1443 }
1444
1445 /*
1446 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1447 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1448 */
1449 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1450 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1451
1452 /* decl o0 ; Fragment color */
1453 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1454 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1455
1456 /* decl t0-t4 */
1457 decl = vl_decl_temps(0, 4);
1458 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1459
1460 /*
1461 * decl s0 ; Sampler for luma texture
1462 * decl s1 ; Sampler for chroma Cb texture
1463 * decl s2 ; Sampler for chroma Cr texture
1464 * decl s3 ; Sampler for ref surface texture
1465 */
1466 for (i = 0; i < 4; ++i)
1467 {
1468 decl = vl_decl_samplers(i, i);
1469 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1470 }
1471
1472 /*
1473 * tex2d t1, i0, s0 ; Read texel from luma texture
1474 * mov t0.x, t1.x ; Move luma sample into .x component
1475 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1476 * mov t0.y, t1.x ; Move Cb sample into .y component
1477 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1478 * mov t0.z, t1.x ; Move Cr sample into .z component
1479 */
1480 for (i = 0; i < 3; ++i)
1481 {
1482 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1483 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1484
1485 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1486 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1487 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1488 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1489 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1490 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1491
1492 }
1493
1494 /* mul t0, t0, c0 ; Rescale texel to correct range */
1495 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1496 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1497
1498 /*
1499 * tex2d t1, i1, s3 ; Read texel from ref macroblock top field
1500 * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field
1501 */
1502 for (i = 0; i < 2; ++i)
1503 {
1504 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
1505 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1506 }
1507
1508 /* XXX: Pos values off by 0.5? */
1509 /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */
1510 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1);
1511 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1512 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1513 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1514 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1515 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1516 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1517 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1518 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1519 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1520
1521 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1522 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1523 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1524 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1525 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1526 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1527 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1528
1529 /* floor t3, t3 ; Get rid of fractional part */
1530 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1531 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1532
1533 /* mul t3, t3, c1.y ; Multiply by 2 */
1534 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1535 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1536 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1537 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1538 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1539 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1540
1541 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1542 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1543 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1544
1545 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1546 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1547 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1548 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1549
1550 /* add o0, t0, t1 ; Add ref and differential to form final output */
1551 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1552 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1553
1554 /* end */
1555 inst = vl_end();
1556 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1557
1558 fs.tokens = tokens;
1559 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1560 free(tokens);
1561
1562 return 0;
1563 }
1564
1565 static int vlCreateVertexShaderFrameBMB
1566 (
1567 struct vlR16SnormBufferedMC *mc
1568 )
1569 {
1570 const unsigned int max_tokens = 100;
1571
1572 struct pipe_context *pipe;
1573 struct pipe_shader_state vs;
1574 struct tgsi_token *tokens;
1575 struct tgsi_header *header;
1576
1577 struct tgsi_full_declaration decl;
1578 struct tgsi_full_instruction inst;
1579
1580 unsigned int ti;
1581 unsigned int i;
1582
1583 assert(mc);
1584
1585 pipe = mc->pipe;
1586 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1587
1588 /* Version */
1589 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1590 /* Header */
1591 header = (struct tgsi_header*)&tokens[1];
1592 *header = tgsi_build_header();
1593 /* Processor */
1594 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1595
1596 ti = 3;
1597
1598 /*
1599 * decl i0 ; Vertex pos, luma/chroma texcoords
1600 * decl i1 ; First ref surface top field texcoords
1601 * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream)
1602 * decl i3 ; Second ref surface top field texcoords
1603 * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream)
1604 */
1605 for (i = 0; i < 5; i++)
1606 {
1607 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1608 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1609 }
1610
1611 /*
1612 * decl o0 ; Vertex pos
1613 * decl o1 ; Luma/chroma texcoords
1614 * decl o2 ; First ref macroblock texcoords
1615 * decl o3 ; Second ref macroblock texcoords
1616 */
1617 for (i = 0; i < 4; i++)
1618 {
1619 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1620 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1621 }
1622
1623 /*
1624 * mov o0, i0 ; Move input vertex pos to output
1625 * mov o1, i0 ; Move input luma/chroma texcoords to output
1626 */
1627 for (i = 0; i < 2; ++i)
1628 {
1629 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1630 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1631 }
1632
1633 /*
1634 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1635 * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1636 */
1637 for (i = 0; i < 2; ++i)
1638 {
1639 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1);
1640 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1641 }
1642
1643 /* end */
1644 inst = vl_end();
1645 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1646
1647 vs.tokens = tokens;
1648 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1649 free(tokens);
1650
1651 return 0;
1652 }
1653
1654 static int vlCreateVertexShaderFieldBMB
1655 (
1656 struct vlR16SnormBufferedMC *mc
1657 )
1658 {
1659 const unsigned int max_tokens = 100;
1660
1661 struct pipe_context *pipe;
1662 struct pipe_shader_state vs;
1663 struct tgsi_token *tokens;
1664 struct tgsi_header *header;
1665
1666 struct tgsi_full_declaration decl;
1667 struct tgsi_full_instruction inst;
1668
1669 unsigned int ti;
1670 unsigned int i;
1671
1672 assert(mc);
1673
1674 pipe = mc->pipe;
1675 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1676
1677 /* Version */
1678 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1679 /* Header */
1680 header = (struct tgsi_header*)&tokens[1];
1681 *header = tgsi_build_header();
1682 /* Processor */
1683 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1684
1685 ti = 3;
1686
1687 /*
1688 * decl i0 ; Vertex pos, Luma/chroma texcoords
1689 * decl i1 ; First ref surface top field texcoords
1690 * decl i2 ; First ref surface bottom field texcoords
1691 * decl i3 ; Second ref surface top field texcoords
1692 * decl i4 ; Second ref surface bottom field texcoords
1693 */
1694 for (i = 0; i < 5; i++)
1695 {
1696 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1697 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1698 }
1699
1700 /* decl c0 ; Denorm coefficients */
1701 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1702 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1703
1704 /*
1705 * decl o0 ; Vertex pos
1706 * decl o1 ; Luma/chroma texcoords
1707 * decl o2 ; Top field past ref macroblock texcoords
1708 * decl o3 ; Bottom field past ref macroblock texcoords
1709 * decl o4 ; Top field future ref macroblock texcoords
1710 * decl o5 ; Bottom field future ref macroblock texcoords
1711 * decl o6 ; Denormalized vertex pos
1712 */
1713 for (i = 0; i < 7; i++)
1714 {
1715 decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1716 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1717 }
1718
1719 /* decl t0, t1 */
1720 decl = vl_decl_temps(0, 1);
1721 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1722
1723 /*
1724 * mov o0, i0 ; Move input vertex pos to output
1725 * mov o1, i0 ; Move input luma/chroma texcoords to output
1726 * mov o2, i1 ; Move past top field texcoords to output
1727 * mov o3, i2 ; Move past bottom field texcoords to output
1728 * mov o4, i3 ; Move future top field texcoords to output
1729 * mov o5, i4 ; Move future bottom field texcoords to output
1730 */
1731 for (i = 0; i < 6; ++i)
1732 {
1733 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1734 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1735 }
1736
1737 /*
1738 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1739 * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1740 * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1741 * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1742 */
1743 for (i = 0; i < 4; ++i)
1744 {
1745 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1746 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1747 }
1748
1749 /* mul o6, i0, c0 ; Denorm vertex pos */
1750 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1751 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1752
1753 /* end */
1754 inst = vl_end();
1755 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1756
1757 vs.tokens = tokens;
1758 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1759 free(tokens);
1760
1761 return 0;
1762 }
1763
1764 static int vlCreateFragmentShaderFrameBMB
1765 (
1766 struct vlR16SnormBufferedMC *mc
1767 )
1768 {
1769 const unsigned int max_tokens = 100;
1770
1771 struct pipe_context *pipe;
1772 struct pipe_shader_state fs;
1773 struct tgsi_token *tokens;
1774 struct tgsi_header *header;
1775
1776 struct tgsi_full_declaration decl;
1777 struct tgsi_full_instruction inst;
1778
1779 unsigned int ti;
1780 unsigned int i;
1781
1782 assert(mc);
1783
1784 pipe = mc->pipe;
1785 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1786
1787 /* Version */
1788 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1789 /* Header */
1790 header = (struct tgsi_header*)&tokens[1];
1791 *header = tgsi_build_header();
1792 /* Processor */
1793 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1794
1795 ti = 3;
1796
1797 /*
1798 * decl i0 ; Texcoords for s0, s1, s2
1799 * decl i1 ; Texcoords for s3
1800 * decl i2 ; Texcoords for s4
1801 */
1802 for (i = 0; i < 3; ++i)
1803 {
1804 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1805 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1806 }
1807
1808 /*
1809 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1810 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1811 */
1812 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1813 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1814
1815 /* decl o0 ; Fragment color */
1816 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1817 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1818
1819 /* decl t0-t2 */
1820 decl = vl_decl_temps(0, 2);
1821 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1822
1823 /*
1824 * decl s0 ; Sampler for luma texture
1825 * decl s1 ; Sampler for chroma Cb texture
1826 * decl s2 ; Sampler for chroma Cr texture
1827 * decl s3 ; Sampler for past ref surface texture
1828 * decl s4 ; Sampler for future ref surface texture
1829 */
1830 for (i = 0; i < 5; ++i)
1831 {
1832 decl = vl_decl_samplers(i, i);
1833 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1834 }
1835
1836 /*
1837 * tex2d t1, i0, s0 ; Read texel from luma texture
1838 * mov t0.x, t1.x ; Move luma sample into .x component
1839 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1840 * mov t0.y, t1.x ; Move Cb sample into .y component
1841 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1842 * mov t0.z, t1.x ; Move Cr sample into .z component
1843 */
1844 for (i = 0; i < 3; ++i)
1845 {
1846 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1847 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1848
1849 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1850 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1851 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1852 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1853 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1854 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1855
1856 }
1857
1858 /* mul t0, t0, c0 ; Rescale texel to correct range */
1859 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1860 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1861
1862 /*
1863 * tex2d t1, i1, s3 ; Read texel from past ref macroblock
1864 * tex2d t2, i2, s4 ; Read texel from future ref macroblock
1865 */
1866 for (i = 0; i < 2; ++i)
1867 {
1868 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3);
1869 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1870 }
1871
1872 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1873 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1874 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1875 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1876 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1877 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1878 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1879
1880 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1881 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1882 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1883
1884 /* end */
1885 inst = vl_end();
1886 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1887
1888 fs.tokens = tokens;
1889 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1890 free(tokens);
1891
1892 return 0;
1893 }
1894
1895 static int vlCreateFragmentShaderFieldBMB
1896 (
1897 struct vlR16SnormBufferedMC *mc
1898 )
1899 {
1900 const unsigned int max_tokens = 200;
1901
1902 struct pipe_context *pipe;
1903 struct pipe_shader_state fs;
1904 struct tgsi_token *tokens;
1905 struct tgsi_header *header;
1906
1907 struct tgsi_full_declaration decl;
1908 struct tgsi_full_instruction inst;
1909
1910 unsigned int ti;
1911 unsigned int i;
1912
1913 assert(mc);
1914
1915 pipe = mc->pipe;
1916 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1917
1918 /* Version */
1919 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1920 /* Header */
1921 header = (struct tgsi_header*)&tokens[1];
1922 *header = tgsi_build_header();
1923 /* Processor */
1924 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1925
1926 ti = 3;
1927
1928 /*
1929 * decl i0 ; Texcoords for s0, s1, s2
1930 * decl i1 ; Texcoords for s3
1931 * decl i2 ; Texcoords for s3
1932 * decl i3 ; Texcoords for s4
1933 * decl i4 ; Texcoords for s4
1934 * decl i5 ; Denormalized vertex pos
1935 */
1936 for (i = 0; i < 6; ++i)
1937 {
1938 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1939 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1940 }
1941
1942 /*
1943 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1944 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1945 * ; and for Y-mod-2 top/bottom field selection
1946 */
1947 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1948 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1949
1950 /* decl o0 ; Fragment color */
1951 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1952 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1953
1954 /* decl t0-t5 */
1955 decl = vl_decl_temps(0, 5);
1956 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1957
1958 /*
1959 * decl s0 ; Sampler for luma texture
1960 * decl s1 ; Sampler for chroma Cb texture
1961 * decl s2 ; Sampler for chroma Cr texture
1962 * decl s3 ; Sampler for past ref surface texture
1963 * decl s4 ; Sampler for future ref surface texture
1964 */
1965 for (i = 0; i < 5; ++i)
1966 {
1967 decl = vl_decl_samplers(i, i);
1968 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1969 }
1970
1971 /*
1972 * tex2d t1, i0, s0 ; Read texel from luma texture
1973 * mov t0.x, t1.x ; Move luma sample into .x component
1974 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1975 * mov t0.y, t1.x ; Move Cb sample into .y component
1976 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1977 * mov t0.z, t1.x ; Move Cr sample into .z component
1978 */
1979 for (i = 0; i < 3; ++i)
1980 {
1981 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1982 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1983
1984 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1985 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1986 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1987 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1988 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1989 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1990
1991 }
1992
1993 /* mul t0, t0, c0 ; Rescale texel to correct range */
1994 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1995 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1996
1997 /* XXX: Pos values off by 0.5? */
1998 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1999 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
2000 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
2001 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
2002 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
2003 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
2004 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2005 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2006 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2007 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2008 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2009
2010 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
2011 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
2012 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2013 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2014 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2015 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2016 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2017
2018 /* floor t3, t3 ; Get rid of fractional part */
2019 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
2020 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2021
2022 /* mul t3, t3, c1.y ; Multiply by 2 */
2023 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
2024 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
2025 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
2026 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
2027 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
2028 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2029
2030 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
2031 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
2032 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2033
2034 /*
2035 * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field
2036 * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field
2037 */
2038 for (i = 0; i < 2; ++i)
2039 {
2040 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
2041 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2042 }
2043
2044 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2045 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2046 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2047 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2048
2049 /*
2050 * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field
2051 * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field
2052 */
2053 for (i = 0; i < 2; ++i)
2054 {
2055 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4);
2056 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2057 }
2058
2059 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2060 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2061 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
2062 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2063
2064 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2065 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2066 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2067 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2068 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2069 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2070 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2071
2072 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2073 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
2074 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2075
2076 /* end */
2077 inst = vl_end();
2078 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2079
2080 fs.tokens = tokens;
2081 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
2082 free(tokens);
2083
2084 return 0;
2085 }
2086
2087 static int vlCreateDataBufs
2088 (
2089 struct vlR16SnormBufferedMC *mc
2090 )
2091 {
2092 const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
2093 const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
2094 const unsigned int num_mb_per_frame = mbw * mbh;
2095
2096 struct pipe_context *pipe;
2097 unsigned int g, h, i;
2098
2099 assert(mc);
2100
2101 pipe = mc->pipe;
2102
2103 for (g = 0; g < NUM_BUF_SETS; ++g)
2104 {
2105 for (h = 0; h < 7; ++h)
2106 {
2107 /* Create our vertex buffer and vertex buffer element */
2108 mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f);
2109 mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1;
2110 mc->vertex_bufs[g][h][0].buffer_offset = 0;
2111 mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create
2112 (
2113 pipe->winsys,
2114 1,
2115 PIPE_BUFFER_USAGE_VERTEX,
2116 sizeof(struct vlVertex2f) * 24 * num_mb_per_frame
2117 );
2118 }
2119 }
2120
2121 /* Position & block luma, block chroma texcoord element */
2122 mc->vertex_elems[0].src_offset = 0;
2123 mc->vertex_elems[0].vertex_buffer_index = 0;
2124 mc->vertex_elems[0].nr_components = 2;
2125 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2126
2127 for (g = 0; g < NUM_BUF_SETS; ++g)
2128 {
2129 for (h = 0; h < 7; ++h)
2130 {
2131 for (i = 1; i < 3; ++i)
2132 {
2133 mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2;
2134 mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1;
2135 mc->vertex_bufs[g][h][i].buffer_offset = 0;
2136 mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create
2137 (
2138 pipe->winsys,
2139 1,
2140 PIPE_BUFFER_USAGE_VERTEX,
2141 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
2142 );
2143 }
2144 }
2145 }
2146
2147 /* First ref surface top field texcoord element */
2148 mc->vertex_elems[1].src_offset = 0;
2149 mc->vertex_elems[1].vertex_buffer_index = 1;
2150 mc->vertex_elems[1].nr_components = 2;
2151 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
2152
2153 /* First ref surface bottom field texcoord element */
2154 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f);
2155 mc->vertex_elems[2].vertex_buffer_index = 1;
2156 mc->vertex_elems[2].nr_components = 2;
2157 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
2158
2159 /* Second ref surface top field texcoord element */
2160 mc->vertex_elems[3].src_offset = 0;
2161 mc->vertex_elems[3].vertex_buffer_index = 2;
2162 mc->vertex_elems[3].nr_components = 2;
2163 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
2164
2165 /* Second ref surface bottom field texcoord element */
2166 mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f);
2167 mc->vertex_elems[4].vertex_buffer_index = 2;
2168 mc->vertex_elems[4].nr_components = 2;
2169 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
2170
2171 /* Create our constant buffer */
2172 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
2173 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
2174 (
2175 pipe->winsys,
2176 1,
2177 PIPE_BUFFER_USAGE_CONSTANT,
2178 mc->vs_const_buf.size
2179 );
2180
2181 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
2182 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
2183 (
2184 pipe->winsys,
2185 1,
2186 PIPE_BUFFER_USAGE_CONSTANT,
2187 mc->fs_const_buf.size
2188 );
2189
2190 memcpy
2191 (
2192 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2193 &fs_consts,
2194 sizeof(struct vlFragmentShaderConsts)
2195 );
2196
2197 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
2198
2199 return 0;
2200 }
2201
2202 static int vlInit
2203 (
2204 struct vlR16SnormBufferedMC *mc
2205 )
2206 {
2207 struct pipe_context *pipe;
2208 struct pipe_sampler_state sampler;
2209 struct pipe_texture template;
2210 unsigned int filters[5];
2211 unsigned int i;
2212
2213 assert(mc);
2214
2215 pipe = mc->pipe;
2216
2217 /* For MC we render to textures, which are rounded up to nearest POT */
2218 mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width);
2219 mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height);
2220 mc->viewport.scale[2] = 1;
2221 mc->viewport.scale[3] = 1;
2222 mc->viewport.translate[0] = 0;
2223 mc->viewport.translate[1] = 0;
2224 mc->viewport.translate[2] = 0;
2225 mc->viewport.translate[3] = 0;
2226
2227 mc->render_target.width = vlRoundUpPOT(mc->video_width);
2228 mc->render_target.height = vlRoundUpPOT(mc->video_height);
2229 mc->render_target.num_cbufs = 1;
2230 /* FB for MC stage is a vlSurface created by the user, set at render time */
2231 mc->render_target.zsbuf = NULL;
2232
2233 filters[0] = PIPE_TEX_FILTER_NEAREST;
2234 filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
2235 filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
2236 filters[3] = PIPE_TEX_FILTER_LINEAR;
2237 filters[4] = PIPE_TEX_FILTER_LINEAR;
2238
2239 for (i = 0; i < 5; ++i)
2240 {
2241 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2242 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2243 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2244 sampler.min_img_filter = filters[i];
2245 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2246 sampler.mag_img_filter = filters[i];
2247 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2248 sampler.compare_func = PIPE_FUNC_ALWAYS;
2249 sampler.normalized_coords = 1;
2250 /*sampler.prefilter = ;*/
2251 /*sampler.shadow_ambient = ;*/
2252 /*sampler.lod_bias = ;*/
2253 sampler.min_lod = 0;
2254 /*sampler.max_lod = ;*/
2255 /*sampler.border_color[i] = ;*/
2256 /*sampler.max_anisotropy = ;*/
2257 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2258 }
2259
2260 memset(&template, 0, sizeof(struct pipe_texture));
2261 template.target = PIPE_TEXTURE_2D;
2262 template.format = PIPE_FORMAT_R16_SNORM;
2263 template.last_level = 0;
2264 template.width[0] = vlRoundUpPOT(mc->video_width);
2265 template.height[0] = vlRoundUpPOT(mc->video_height);
2266 template.depth[0] = 1;
2267 template.compressed = 0;
2268 pf_get_block(template.format, &template.block);
2269
2270 for (i = 0; i < NUM_BUF_SETS; ++i)
2271 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2272
2273 if (mc->video_format == vlFormatYCbCr420)
2274 {
2275 template.width[0] = vlRoundUpPOT(mc->video_width / 2);
2276 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2277 }
2278 else if (mc->video_format == vlFormatYCbCr422)
2279 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2280
2281 for (i = 0; i < NUM_BUF_SETS; ++i)
2282 {
2283 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2284 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2285 }
2286
2287 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2288
2289 vlCreateVertexShaderIMB(mc);
2290 vlCreateFragmentShaderIMB(mc);
2291 vlCreateVertexShaderFramePMB(mc);
2292 vlCreateVertexShaderFieldPMB(mc);
2293 vlCreateFragmentShaderFramePMB(mc);
2294 vlCreateFragmentShaderFieldPMB(mc);
2295 vlCreateVertexShaderFrameBMB(mc);
2296 vlCreateVertexShaderFieldBMB(mc);
2297 vlCreateFragmentShaderFrameBMB(mc);
2298 vlCreateFragmentShaderFieldBMB(mc);
2299 vlCreateDataBufs(mc);
2300
2301 return 0;
2302 }
2303
2304 int vlCreateR16SNormBufferedMC
2305 (
2306 struct pipe_context *pipe,
2307 unsigned int video_width,
2308 unsigned int video_height,
2309 enum vlFormat video_format,
2310 struct vlRender **render
2311 )
2312 {
2313 struct vlR16SnormBufferedMC *mc;
2314
2315 assert(pipe);
2316 assert(render);
2317
2318 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2319
2320 mc->base.vlBegin = &vlBegin;
2321 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2322 mc->base.vlEnd = &vlEnd;
2323 mc->base.vlFlush = &vlFlush;
2324 mc->base.vlDestroy = &vlDestroy;
2325 mc->pipe = pipe;
2326 mc->video_width = video_width;
2327 mc->video_height = video_height;
2328
2329 mc->cur_buf = 0;
2330 mc->buffered_surface = NULL;
2331 mc->past_surface = NULL;
2332 mc->future_surface = NULL;
2333 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7);
2334 mc->total_num_macroblocks = 0;
2335
2336 vlInit(mc);
2337
2338 *render = &mc->base;
2339
2340 return 0;
2341 }