g3dvl: Define texcoords seperately for luma, Cb, Cr textures.
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 /*
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
27 * than we destroy.
28 */
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
30
31 enum vlMacroBlockTypeEx
32 {
33 vlMacroBlockExTypeIntra,
34 vlMacroBlockExTypeFwdPredictedFrame,
35 vlMacroBlockExTypeFwdPredictedField,
36 vlMacroBlockExTypeBkwdPredictedFrame,
37 vlMacroBlockExTypeBkwdPredictedField,
38 vlMacroBlockExTypeBiPredictedFrame,
39 vlMacroBlockExTypeBiPredictedField,
40
41 vlNumMacroBlockExTypes
42 };
43
44 struct vlVertexShaderConsts
45 {
46 struct vlVertex4f denorm;
47 };
48
49 struct vlFragmentShaderConsts
50 {
51 struct vlVertex4f multiplier;
52 struct vlVertex4f div;
53 };
54
55 struct vlR16SnormBufferedMC
56 {
57 struct vlRender base;
58
59 unsigned int picture_width, picture_height;
60 enum vlFormat picture_format;
61
62 unsigned int cur_buf;
63 struct vlSurface *buffered_surface;
64 struct vlSurface *past_surface, *future_surface;
65 struct vlVertex2f surface_tex_inv_size;
66 unsigned int num_macroblocks;
67 struct vlMpeg2MacroBlock *macroblocks;
68
69 struct pipe_context *pipe;
70 struct pipe_viewport_state viewport;
71 struct pipe_framebuffer_state render_target;
72 struct pipe_sampler_state *samplers[5];
73 struct pipe_texture *textures[NUM_BUF_SETS][5];
74 void *i_vs, *p_vs[2], *b_vs[2];
75 void *i_fs, *p_fs[2], *b_fs[2];
76 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3];
77 struct pipe_vertex_element vertex_elems[8];
78 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
79 };
80
81 static int vlBegin
82 (
83 struct vlRender *render
84 )
85 {
86 assert(render);
87
88 return 0;
89 }
90
91 static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
92 {
93 unsigned int y;
94
95 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
96 memcpy
97 (
98 dst + y * dst_pitch,
99 src + y * VL_BLOCK_WIDTH,
100 VL_BLOCK_WIDTH * 2
101 );
102
103 return 0;
104 }
105
106 static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
107 {
108 unsigned int y;
109
110 for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
111 memcpy
112 (
113 dst + y * dst_pitch * 2,
114 src + y * VL_BLOCK_WIDTH,
115 VL_BLOCK_WIDTH * 2
116 );
117
118 dst += VL_BLOCK_HEIGHT * dst_pitch;
119
120 for (; y < VL_BLOCK_HEIGHT; ++y)
121 memcpy
122 (
123 dst + y * dst_pitch * 2,
124 src + y * VL_BLOCK_WIDTH,
125 VL_BLOCK_WIDTH * 2
126 );
127
128 return 0;
129 }
130
131 static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
132 {
133 unsigned int y;
134
135 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
136 memset
137 (
138 dst + y * dst_pitch,
139 0,
140 VL_BLOCK_WIDTH * 2
141 );
142
143 return 0;
144 }
145
146 static inline int vlGrabBlocks
147 (
148 struct vlR16SnormBufferedMC *mc,
149 unsigned int mbx,
150 unsigned int mby,
151 enum vlDCTType dct_type,
152 unsigned int coded_block_pattern,
153 short *blocks
154 )
155 {
156 struct pipe_surface *tex_surface;
157 short *texels;
158 unsigned int tex_pitch;
159 unsigned int x, y, tb = 0, sb = 0;
160 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
161
162 assert(mc);
163 assert(blocks);
164
165 tex_surface = mc->pipe->screen->get_tex_surface
166 (
167 mc->pipe->screen,
168 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
169 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
170 );
171
172 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
173 tex_pitch = tex_surface->stride / tex_surface->block.size;
174
175 texels += mbpy * tex_pitch + mbpx;
176
177 for (y = 0; y < 2; ++y)
178 {
179 for (x = 0; x < 2; ++x, ++tb)
180 {
181 if ((coded_block_pattern >> (5 - tb)) & 1)
182 {
183 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
184
185 if (dct_type == vlDCTTypeFrameCoded)
186 {
187 vlGrabFrameCodedBlock
188 (
189 cur_block,
190 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
191 tex_pitch
192 );
193 }
194 else
195 {
196 vlGrabFieldCodedBlock
197 (
198 cur_block,
199 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
200 tex_pitch
201 );
202 }
203
204 ++sb;
205 }
206 else
207 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
208 }
209 }
210
211 pipe_surface_unmap(tex_surface);
212
213 /* TODO: Implement 422, 444 */
214 mbpx >>= 1;
215 mbpy >>= 1;
216
217 for (tb = 0; tb < 2; ++tb)
218 {
219 tex_surface = mc->pipe->screen->get_tex_surface
220 (
221 mc->pipe->screen,
222 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
223 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
224 );
225
226 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
227 tex_pitch = tex_surface->stride / tex_surface->block.size;
228
229 texels += mbpy * tex_pitch + mbpx;
230
231 if ((coded_block_pattern >> (1 - tb)) & 1)
232 {
233 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
234
235 vlGrabFrameCodedBlock
236 (
237 cur_block,
238 texels,
239 tex_pitch
240 );
241
242 ++sb;
243 }
244 else
245 vlGrabNoBlock(texels, tex_pitch);
246
247 pipe_surface_unmap(tex_surface);
248 }
249
250 return 0;
251 }
252
253 static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb)
254 {
255 assert(mb);
256
257 switch (mb->mb_type)
258 {
259 case vlMacroBlockTypeIntra:
260 return vlMacroBlockExTypeIntra;
261 case vlMacroBlockTypeFwdPredicted:
262 return mb->mo_type == vlMotionTypeFrame ?
263 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
264 case vlMacroBlockTypeBkwdPredicted:
265 return mb->mo_type == vlMotionTypeFrame ?
266 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
267 case vlMacroBlockTypeBiPredicted:
268 return mb->mo_type == vlMotionTypeFrame ?
269 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
270 default:
271 assert(0);
272 }
273
274 /* Unreachable */
275 return -1;
276 }
277
278 static inline int vlGrabMacroBlock
279 (
280 struct vlR16SnormBufferedMC *mc,
281 struct vlMpeg2MacroBlock *macroblock
282 )
283 {
284 assert(mc);
285 assert(macroblock);
286
287 mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx;
288 mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby;
289 mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type;
290 mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type;
291 mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type;
292 mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0];
293 mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1];
294 mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0];
295 mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1];
296 mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0];
297 mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1];
298 mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0];
299 mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1];
300 mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp;
301 mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks;
302
303 vlGrabBlocks
304 (
305 mc,
306 macroblock->mbx,
307 macroblock->mby,
308 macroblock->dct_type,
309 macroblock->cbp,
310 macroblock->blocks
311 );
312
313 mc->num_macroblocks++;
314
315 return 0;
316 }
317
318 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zx, zy) \
319 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
320 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
321 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
322 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
323 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
324 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
325 \
326 /*if ((cbp) & (lm)) \
327 {*/ \
328 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
329 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
330 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
331 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
332 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
333 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
334 /*} \
335 else \
336 { \
337 (vb)[0].luma_tc.x = (zx); (vb)[0].luma_tc.y = (zy); \
338 (vb)[1].luma_tc.x = (zx); (vb)[1].luma_tc.y = (zy) + (hy); \
339 (vb)[2].luma_tc.x = (zx) + (hx); (vb)[2].luma_tc.y = (zy); \
340 (vb)[3].luma_tc.x = (zx) + (hx); (vb)[3].luma_tc.y = (zy); \
341 (vb)[4].luma_tc.x = (zx); (vb)[4].luma_tc.y = (zy) + (hy); \
342 (vb)[5].luma_tc.x = ((zx) + (hx); (vb)[5].luma_tc.y = (zy) + (hy); \
343 }*/ \
344 \
345 /*if ((cbp) & (cbm)) \
346 {*/ \
347 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
348 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
349 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
350 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
351 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
352 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
353 /*} \
354 else \
355 { \
356 (vb)[0].cb_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \
357 (vb)[1].cb_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \
358 (vb)[2].cb_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \
359 (vb)[3].cb_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \
360 (vb)[4].cb_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \
361 (vb)[5].cb_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \
362 }*/ \
363 \
364 /*if ((cbp) & (crm)) \
365 {*/ \
366 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
367 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
368 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
369 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
370 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
371 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
372 /*} \
373 else \
374 { \
375 (vb)[0].cr_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \
376 (vb)[1].cr_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \
377 (vb)[2].cr_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \
378 (vb)[3].cr_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \
379 (vb)[4].cr_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \
380 (vb)[5].cr_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \
381 }*/
382
383 static inline int vlGrabMacroBlockVB
384 (
385 struct vlR16SnormBufferedMC *mc,
386 struct vlMpeg2MacroBlock *macroblock,
387 unsigned int pos
388 )
389 {
390 struct vlVertex2f mo_vec[2];
391 unsigned int i;
392
393 assert(mc);
394 assert(macroblock);
395
396 switch (macroblock->mb_type)
397 {
398 case vlMacroBlockTypeBiPredicted:
399 {
400 struct vlVertex2f *vb;
401
402 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
403 (
404 mc->pipe->winsys,
405 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer,
406 PIPE_BUFFER_USAGE_CPU_WRITE
407 ) + pos * 2 * 24;
408
409 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
410 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
411
412 if (macroblock->mo_type == vlMotionTypeFrame)
413 {
414 for (i = 0; i < 24 * 2; i += 2)
415 {
416 vb[i].x = mo_vec[0].x;
417 vb[i].y = mo_vec[0].y;
418 }
419 }
420 else
421 {
422 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
423 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
424
425 for (i = 0; i < 24 * 2; i += 2)
426 {
427 vb[i].x = mo_vec[0].x;
428 vb[i].y = mo_vec[0].y;
429 vb[i + 1].x = mo_vec[1].x;
430 vb[i + 1].y = mo_vec[1].y;
431 }
432 }
433
434 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer);
435
436 /* fall-through */
437 }
438 case vlMacroBlockTypeFwdPredicted:
439 case vlMacroBlockTypeBkwdPredicted:
440 {
441 struct vlVertex2f *vb;
442
443 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
444 (
445 mc->pipe->winsys,
446 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer,
447 PIPE_BUFFER_USAGE_CPU_WRITE
448 ) + pos * 2 * 24;
449
450 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
451 {
452 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
453 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
454
455 if (macroblock->mo_type == vlMotionTypeField)
456 {
457 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
458 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
459 }
460 }
461 else
462 {
463 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
464 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
465
466 if (macroblock->mo_type == vlMotionTypeField)
467 {
468 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
469 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
470 }
471 }
472
473 if (macroblock->mo_type == vlMotionTypeFrame)
474 {
475 for (i = 0; i < 24 * 2; i += 2)
476 {
477 vb[i].x = mo_vec[0].x;
478 vb[i].y = mo_vec[0].y;
479 }
480 }
481 else
482 {
483 for (i = 0; i < 24 * 2; i += 2)
484 {
485 vb[i].x = mo_vec[0].x;
486 vb[i].y = mo_vec[0].y;
487 vb[i + 1].x = mo_vec[1].x;
488 vb[i + 1].y = mo_vec[1].y;
489 }
490 }
491
492 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer);
493
494 /* fall-through */
495 }
496 case vlMacroBlockTypeIntra:
497 {
498 const struct vlVertex2f unit =
499 {
500 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
501 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
502 };
503 const struct vlVertex2f half =
504 {
505 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
506 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
507 };
508
509 struct vlMacroBlockVertexStream0
510 {
511 struct vlVertex2f pos;
512 struct vlVertex2f luma_tc;
513 struct vlVertex2f cb_tc;
514 struct vlVertex2f cr_tc;
515 } *vb;
516
517 vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map
518 (
519 mc->pipe->winsys,
520 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer,
521 PIPE_BUFFER_USAGE_CPU_WRITE
522 ) + pos * 24;
523
524 SET_BLOCK
525 (
526 vb,
527 macroblock->cbp, macroblock->mbx, macroblock->mby,
528 unit.x, unit.y, 0, 0, half.x, half.y,
529 32, 2, 1, mc->zero_block.x, mc->zero_block.y
530 );
531
532 SET_BLOCK
533 (
534 vb + 6,
535 macroblock->cbp, macroblock->mbx, macroblock->mby,
536 unit.x, unit.y, half.x, 0, half.x, half.y,
537 16, 2, 1, mc->zero_block.x, mc->zero_block.y
538 );
539
540 SET_BLOCK
541 (
542 vb + 12,
543 macroblock->cbp, macroblock->mbx, macroblock->mby,
544 unit.x, unit.y, 0, half.y, half.x, half.y,
545 8, 2, 1, mc->zero_block.x, mc->zero_block.y
546 );
547
548 SET_BLOCK
549 (
550 vb + 18,
551 macroblock->cbp, macroblock->mbx, macroblock->mby,
552 unit.x, unit.y, half.x, half.y, half.x, half.y,
553 4, 2, 1, mc->zero_block.x, mc->zero_block.y
554 );
555
556 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer);
557
558 break;
559 }
560 default:
561 assert(0);
562 }
563
564 return 0;
565 }
566
567 static int vlFlush
568 (
569 struct vlRender *render
570 )
571 {
572 struct vlR16SnormBufferedMC *mc;
573 struct pipe_context *pipe;
574 struct vlVertexShaderConsts *vs_consts;
575 unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0};
576 unsigned int offset[vlNumMacroBlockExTypes];
577 unsigned int vb_start = 0;
578 unsigned int i;
579
580 assert(render);
581
582 mc = (struct vlR16SnormBufferedMC*)render;
583 pipe = mc->pipe;
584
585 for (i = 0; i < mc->num_macroblocks; ++i)
586 {
587 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
588
589 num_macroblocks[mb_type_ex]++;
590 }
591
592 offset[0] = 0;
593
594 for (i = 1; i < vlNumMacroBlockExTypes; ++i)
595 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
596
597 for (i = 0; i < mc->num_macroblocks; ++i)
598 {
599 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
600
601 vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]);
602
603 offset[mb_type_ex]++;
604 }
605
606 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
607 (
608 pipe->screen,
609 mc->buffered_surface->texture,
610 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
611 );
612
613 pipe->set_framebuffer_state(pipe, &mc->render_target);
614 pipe->set_viewport_state(pipe, &mc->viewport);
615 vs_consts = pipe->winsys->buffer_map
616 (
617 pipe->winsys,
618 mc->vs_const_buf.buffer,
619 PIPE_BUFFER_USAGE_CPU_WRITE
620 );
621
622 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
623 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
624
625 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
626 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
627 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
628
629 if (num_macroblocks[vlMacroBlockExTypeIntra] > 0)
630 {
631 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
632 pipe->set_vertex_elements(pipe, 4, mc->vertex_elems);
633 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
634 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
635 pipe->bind_vs_state(pipe, mc->i_vs);
636 pipe->bind_fs_state(pipe, mc->i_fs);
637
638 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24);
639 vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24;
640 }
641
642 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
643 {
644 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
645 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
646 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
647 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
648 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
649 pipe->bind_vs_state(pipe, mc->p_vs[0]);
650 pipe->bind_fs_state(pipe, mc->p_fs[0]);
651
652 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
653 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24;
654 }
655
656 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
657 {
658 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
659 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
660 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
661 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
662 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
663 pipe->bind_vs_state(pipe, mc->p_vs[1]);
664 pipe->bind_fs_state(pipe, mc->p_fs[1]);
665
666 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
667 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24;
668 }
669
670 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
671 {
672 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
673 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
674 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
675 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
676 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
677 pipe->bind_vs_state(pipe, mc->p_vs[0]);
678 pipe->bind_fs_state(pipe, mc->p_fs[0]);
679
680 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
681 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24;
682 }
683
684 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
685 {
686 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
687 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
688 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
689 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
690 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
691 pipe->bind_vs_state(pipe, mc->p_vs[1]);
692 pipe->bind_fs_state(pipe, mc->p_fs[1]);
693
694 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
695 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24;
696 }
697
698 if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
699 {
700 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
701 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
702 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
703 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
704 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
705 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
706 pipe->bind_vs_state(pipe, mc->b_vs[0]);
707 pipe->bind_fs_state(pipe, mc->b_fs[0]);
708
709 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
710 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24;
711 }
712
713 if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
714 {
715 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
716 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
717 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
718 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
719 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
720 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
721 pipe->bind_vs_state(pipe, mc->b_vs[1]);
722 pipe->bind_fs_state(pipe, mc->b_fs[1]);
723
724 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
725 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24;
726 }
727
728 mc->num_macroblocks = 0;
729 mc->cur_buf++;
730
731 return 0;
732 }
733
734 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
735 (
736 struct vlRender *render,
737 struct vlMpeg2MacroBlockBatch *batch,
738 struct vlSurface *surface
739 )
740 {
741 struct vlR16SnormBufferedMC *mc;
742 unsigned int i;
743
744 assert(render);
745
746 mc = (struct vlR16SnormBufferedMC*)render;
747
748 if (mc->buffered_surface)
749 {
750 if
751 (
752 mc->buffered_surface != surface /*||
753 mc->past_surface != batch->past_surface ||
754 mc->future_surface != batch->future_surface*/
755 )
756 {
757 vlFlush(&mc->base);
758 mc->buffered_surface = surface;
759 mc->past_surface = batch->past_surface;
760 mc->future_surface = batch->future_surface;
761 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
762 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
763 }
764 }
765 else
766 {
767 mc->buffered_surface = surface;
768 mc->past_surface = batch->past_surface;
769 mc->future_surface = batch->future_surface;
770 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
771 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
772 }
773
774 for (i = 0; i < batch->num_macroblocks; ++i)
775 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
776
777 return 0;
778 }
779
780 static int vlEnd
781 (
782 struct vlRender *render
783 )
784 {
785 assert(render);
786
787 return 0;
788 }
789
790 static int vlDestroy
791 (
792 struct vlRender *render
793 )
794 {
795 struct vlR16SnormBufferedMC *mc;
796 struct pipe_context *pipe;
797 unsigned int h, i;
798
799 assert(render);
800
801 mc = (struct vlR16SnormBufferedMC*)render;
802 pipe = mc->pipe;
803
804 for (i = 0; i < 5; ++i)
805 pipe->delete_sampler_state(pipe, mc->samplers[i]);
806
807 for (h = 0; h < NUM_BUF_SETS; ++h)
808 for (i = 0; i < 3; ++i)
809 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer);
810
811 /* Textures 3 & 4 are not created directly, no need to release them here */
812 for (i = 0; i < NUM_BUF_SETS; ++i)
813 {
814 pipe_texture_release(&mc->textures[i][0]);
815 pipe_texture_release(&mc->textures[i][1]);
816 pipe_texture_release(&mc->textures[i][2]);
817 }
818
819 pipe->delete_vs_state(pipe, mc->i_vs);
820 pipe->delete_fs_state(pipe, mc->i_fs);
821
822 for (i = 0; i < 2; ++i)
823 {
824 pipe->delete_vs_state(pipe, mc->p_vs[i]);
825 pipe->delete_fs_state(pipe, mc->p_fs[i]);
826 pipe->delete_vs_state(pipe, mc->b_vs[i]);
827 pipe->delete_fs_state(pipe, mc->b_fs[i]);
828 }
829
830 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
831 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
832
833 free(mc->macroblocks);
834 free(mc);
835
836 return 0;
837 }
838
839 /*
840 * Muliplier renormalizes block samples from 16 bits to 12 bits.
841 * Divider is used when calculating Y % 2 for choosing top or bottom
842 * field for P or B macroblocks.
843 * TODO: Use immediates.
844 */
845 static const struct vlFragmentShaderConsts fs_consts =
846 {
847 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
848 {0.5f, 2.0f, 0.0f, 0.0f}
849 };
850
851 static int vlCreateVertexShaderIMB
852 (
853 struct vlR16SnormBufferedMC *mc
854 )
855 {
856 const unsigned int max_tokens = 50;
857
858 struct pipe_context *pipe;
859 struct pipe_shader_state vs;
860 struct tgsi_token *tokens;
861 struct tgsi_header *header;
862
863 struct tgsi_full_declaration decl;
864 struct tgsi_full_instruction inst;
865
866 unsigned int ti;
867 unsigned int i;
868
869 assert(mc);
870
871 pipe = mc->pipe;
872 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
873
874 /* Version */
875 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
876 /* Header */
877 header = (struct tgsi_header*)&tokens[1];
878 *header = tgsi_build_header();
879 /* Processor */
880 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
881
882 ti = 3;
883
884 /*
885 * decl i0 ; Vertex pos
886 * decl i1 ; Luma texcoords
887 * decl i2 ; Chroma Cb texcoords
888 * decl i3 ; Chroma Cr texcoords
889 */
890 for (i = 0; i < 4; i++)
891 {
892 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
893 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
894 }
895
896 /*
897 * decl o0 ; Vertex pos
898 * decl o1 ; Luma texcoords
899 * decl o2 ; Chroma Cb texcoords
900 * decl o3 ; Chroma Cr texcoords
901 */
902 for (i = 0; i < 4; i++)
903 {
904 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
905 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
906 }
907
908 /*
909 * mov o0, i0 ; Move input vertex pos to output
910 * mov o1, i1 ; Move input luma texcoords to output
911 * mov o2, i2 ; Move input chroma Cb texcoords to output
912 * mov o3, i3 ; Move input chroma Cr texcoords to output
913 */
914 for (i = 0; i < 4; ++i)
915 {
916 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
917 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
918 }
919
920 /* end */
921 inst = vl_end();
922 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
923
924 vs.tokens = tokens;
925 mc->i_vs = pipe->create_vs_state(pipe, &vs);
926 free(tokens);
927
928 return 0;
929 }
930
931 static int vlCreateFragmentShaderIMB
932 (
933 struct vlR16SnormBufferedMC *mc
934 )
935 {
936 const unsigned int max_tokens = 100;
937
938 struct pipe_context *pipe;
939 struct pipe_shader_state fs;
940 struct tgsi_token *tokens;
941 struct tgsi_header *header;
942
943 struct tgsi_full_declaration decl;
944 struct tgsi_full_instruction inst;
945
946 unsigned int ti;
947 unsigned int i;
948
949 assert(mc);
950
951 pipe = mc->pipe;
952 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
953
954 /* Version */
955 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
956 /* Header */
957 header = (struct tgsi_header*)&tokens[1];
958 *header = tgsi_build_header();
959 /* Processor */
960 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
961
962 ti = 3;
963
964 /*
965 * decl i0 ; Luma texcoords
966 * decl i1 ; Chroma Cb texcoords
967 * decl i2 ; Chroma Cr texcoords
968 */
969 for (i = 0; i < 3; ++i)
970 {
971 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, i, i, TGSI_INTERPOLATE_LINEAR);
972 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
973 }
974
975 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
976 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
977 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
978
979 /* decl o0 ; Fragment color */
980 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
981 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
982
983 /* decl t0, t1 */
984 decl = vl_decl_temps(0, 1);
985 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
986
987 /*
988 * decl s0 ; Sampler for luma texture
989 * decl s1 ; Sampler for chroma Cb texture
990 * decl s2 ; Sampler for chroma Cr texture
991 */
992 for (i = 0; i < 3; ++i)
993 {
994 decl = vl_decl_samplers(i, i);
995 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
996 }
997
998 /*
999 * tex2d t1, i0, s0 ; Read texel from luma texture
1000 * mov t0.x, t1.x ; Move luma sample into .x component
1001 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1002 * mov t0.y, t1.x ; Move Cb sample into .y component
1003 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1004 * mov t0.z, t1.x ; Move Cr sample into .z component
1005 */
1006 for (i = 0; i < 3; ++i)
1007 {
1008 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1009 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1010
1011 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1012 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1013 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1014 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1015 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1016 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1017
1018 }
1019
1020 /* mul o0, t0, c0 ; Rescale texel to correct range */
1021 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1022 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1023
1024 /* end */
1025 inst = vl_end();
1026 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1027
1028 fs.tokens = tokens;
1029 mc->i_fs = pipe->create_fs_state(pipe, &fs);
1030 free(tokens);
1031
1032 return 0;
1033 }
1034
1035 static int vlCreateVertexShaderFramePMB
1036 (
1037 struct vlR16SnormBufferedMC *mc
1038 )
1039 {
1040 const unsigned int max_tokens = 100;
1041
1042 struct pipe_context *pipe;
1043 struct pipe_shader_state vs;
1044 struct tgsi_token *tokens;
1045 struct tgsi_header *header;
1046
1047 struct tgsi_full_declaration decl;
1048 struct tgsi_full_instruction inst;
1049
1050 unsigned int ti;
1051 unsigned int i;
1052
1053 assert(mc);
1054
1055 pipe = mc->pipe;
1056 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1057
1058 /* Version */
1059 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1060 /* Header */
1061 header = (struct tgsi_header*)&tokens[1];
1062 *header = tgsi_build_header();
1063 /* Processor */
1064 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1065
1066 ti = 3;
1067
1068 /*
1069 * decl i0 ; Vertex pos
1070 * decl i1 ; Luma texcoords
1071 * decl i2 ; Chroma Cb texcoords
1072 * decl i3 ; Chroma Cr texcoords
1073 * decl i4 ; Ref surface top field texcoords
1074 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1075 */
1076 for (i = 0; i < 6; i++)
1077 {
1078 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1079 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1080 }
1081
1082 /*
1083 * decl o0 ; Vertex pos
1084 * decl o1 ; Luma texcoords
1085 * decl o2 ; Chroma Cb texcoords
1086 * decl o3 ; Chroma Cr texcoords
1087 * decl o4 ; Ref macroblock texcoords
1088 */
1089 for (i = 0; i < 5; i++)
1090 {
1091 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1092 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1093 }
1094
1095 /*
1096 * mov o0, i0 ; Move input vertex pos to output
1097 * mov o1, i1 ; Move input luma texcoords to output
1098 * mov o2, i2 ; Move input chroma Cb texcoords to output
1099 * mov o3, i3 ; Move input chroma Cr texcoords to output
1100 */
1101 for (i = 0; i < 4; ++i)
1102 {
1103 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1104 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1105 }
1106
1107 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1108 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
1109 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1110
1111 /* end */
1112 inst = vl_end();
1113 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1114
1115 vs.tokens = tokens;
1116 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
1117 free(tokens);
1118
1119 return 0;
1120 }
1121
1122 static int vlCreateVertexShaderFieldPMB
1123 (
1124 struct vlR16SnormBufferedMC *mc
1125 )
1126 {
1127 const unsigned int max_tokens = 100;
1128
1129 struct pipe_context *pipe;
1130 struct pipe_shader_state vs;
1131 struct tgsi_token *tokens;
1132 struct tgsi_header *header;
1133
1134 struct tgsi_full_declaration decl;
1135 struct tgsi_full_instruction inst;
1136
1137 unsigned int ti;
1138 unsigned int i;
1139
1140 assert(mc);
1141
1142 pipe = mc->pipe;
1143 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1144
1145 /* Version */
1146 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1147 /* Header */
1148 header = (struct tgsi_header*)&tokens[1];
1149 *header = tgsi_build_header();
1150 /* Processor */
1151 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1152
1153 ti = 3;
1154
1155 /*
1156 * decl i0 ; Vertex pos
1157 * decl i1 ; Luma texcoords
1158 * decl i2 ; Chroma Cb texcoords
1159 * decl i3 ; Chroma Cr texcoords
1160 * decl i4 ; Ref macroblock top field texcoords
1161 * decl i5 ; Ref macroblock bottom field texcoords
1162 */
1163 for (i = 0; i < 6; i++)
1164 {
1165 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1166 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1167 }
1168
1169 /* decl c0 ; Texcoord denorm coefficients */
1170 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1171 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1172
1173 /*
1174 * decl o0 ; Vertex pos
1175 * decl o1 ; Luma texcoords
1176 * decl o2 ; Chroma Cb texcoords
1177 * decl o3 ; Chroma Cr texcoords
1178 * decl o4 ; Ref macroblock top field texcoords
1179 * decl o5 ; Ref macroblock bottom field texcoords
1180 * decl o6 ; Denormalized vertex pos
1181 */
1182 for (i = 0; i < 7; i++)
1183 {
1184 decl = vl_decl_output((i == 0 || i == 6) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1185 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1186 }
1187
1188 /*
1189 * mov o0, i0 ; Move input vertex pos to output
1190 * mov o1, i1 ; Move input luma texcoords to output
1191 * mov o2, i2 ; Move input chroma Cb texcoords to output
1192 * mov o3, i3 ; Move input chroma Cr texcoords to output
1193 */
1194 for (i = 0; i < 4; ++i)
1195 {
1196 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1197 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1198 }
1199
1200 /*
1201 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1202 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1203 */
1204 for (i = 0; i < 2; ++i)
1205 {
1206 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1207 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1208 }
1209
1210 /* mul o6, i0, c0 ; Denorm vertex pos */
1211 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1212 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1213
1214 /* end */
1215 inst = vl_end();
1216 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1217
1218 vs.tokens = tokens;
1219 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1220 free(tokens);
1221
1222 return 0;
1223 }
1224
1225 static int vlCreateFragmentShaderFramePMB
1226 (
1227 struct vlR16SnormBufferedMC *mc
1228 )
1229 {
1230 const unsigned int max_tokens = 100;
1231
1232 struct pipe_context *pipe;
1233 struct pipe_shader_state fs;
1234 struct tgsi_token *tokens;
1235 struct tgsi_header *header;
1236
1237 struct tgsi_full_declaration decl;
1238 struct tgsi_full_instruction inst;
1239
1240 unsigned int ti;
1241 unsigned int i;
1242
1243 assert(mc);
1244
1245 pipe = mc->pipe;
1246 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1247
1248 /* Version */
1249 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1250 /* Header */
1251 header = (struct tgsi_header*)&tokens[1];
1252 *header = tgsi_build_header();
1253 /* Processor */
1254 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1255
1256 ti = 3;
1257
1258 /*
1259 * decl i0 ; Luma texcoords
1260 * decl i1 ; Chroma Cb texcoords
1261 * decl i2 ; Chroma Cr texcoords
1262 * decl i3 ; Ref macroblock texcoords
1263 */
1264 for (i = 0; i < 4; ++i)
1265 {
1266 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1267 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1268 }
1269
1270 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1271 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1272 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1273
1274 /* decl o0 ; Fragment color */
1275 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1276 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1277
1278 /* decl t0, t1 */
1279 decl = vl_decl_temps(0, 1);
1280 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1281
1282 /*
1283 * decl s0 ; Sampler for luma texture
1284 * decl s1 ; Sampler for chroma Cb texture
1285 * decl s2 ; Sampler for chroma Cr texture
1286 * decl s3 ; Sampler for ref surface texture
1287 */
1288 for (i = 0; i < 4; ++i)
1289 {
1290 decl = vl_decl_samplers(i, i);
1291 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1292 }
1293
1294 /*
1295 * tex2d t1, i0, s0 ; Read texel from luma texture
1296 * mov t0.x, t1.x ; Move luma sample into .x component
1297 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1298 * mov t0.y, t1.x ; Move Cb sample into .y component
1299 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1300 * mov t0.z, t1.x ; Move Cr sample into .z component
1301 */
1302 for (i = 0; i < 3; ++i)
1303 {
1304 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1305 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1306
1307 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1308 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1309 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1310 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1311 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1312 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1313
1314 }
1315
1316 /* mul t0, t0, c0 ; Rescale texel to correct range */
1317 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1318 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1319
1320 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1321 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
1322 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1323
1324 /* add o0, t0, t1 ; Add ref and differential to form final output */
1325 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1326 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1327
1328 /* end */
1329 inst = vl_end();
1330 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1331
1332 fs.tokens = tokens;
1333 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1334 free(tokens);
1335
1336 return 0;
1337 }
1338
1339 static int vlCreateFragmentShaderFieldPMB
1340 (
1341 struct vlR16SnormBufferedMC *mc
1342 )
1343 {
1344 const unsigned int max_tokens = 200;
1345
1346 struct pipe_context *pipe;
1347 struct pipe_shader_state fs;
1348 struct tgsi_token *tokens;
1349 struct tgsi_header *header;
1350
1351 struct tgsi_full_declaration decl;
1352 struct tgsi_full_instruction inst;
1353
1354 unsigned int ti;
1355 unsigned int i;
1356
1357 assert(mc);
1358
1359 pipe = mc->pipe;
1360 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1361
1362 /* Version */
1363 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1364 /* Header */
1365 header = (struct tgsi_header*)&tokens[1];
1366 *header = tgsi_build_header();
1367 /* Processor */
1368 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1369
1370 ti = 3;
1371
1372 /*
1373 * decl i0 ; Luma texcoords
1374 * decl i1 ; Chroma Cb texcoords
1375 * decl i2 ; Chroma Cr texcoords
1376 * decl i3 ; Ref macroblock top field texcoords
1377 * decl i4 ; Ref macroblock bottom field texcoords
1378 * decl i5 ; Denormalized vertex pos
1379 */
1380 for (i = 0; i < 6; ++i)
1381 {
1382 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1383 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1384 }
1385
1386 /*
1387 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1388 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1389 */
1390 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1391 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1392
1393 /* decl o0 ; Fragment color */
1394 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1395 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1396
1397 /* decl t0-t4 */
1398 decl = vl_decl_temps(0, 4);
1399 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1400
1401 /*
1402 * decl s0 ; Sampler for luma texture
1403 * decl s1 ; Sampler for chroma Cb texture
1404 * decl s2 ; Sampler for chroma Cr texture
1405 * decl s3 ; Sampler for ref surface texture
1406 */
1407 for (i = 0; i < 4; ++i)
1408 {
1409 decl = vl_decl_samplers(i, i);
1410 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1411 }
1412
1413 /*
1414 * tex2d t1, i0, s0 ; Read texel from luma texture
1415 * mov t0.x, t1.x ; Move luma sample into .x component
1416 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1417 * mov t0.y, t1.x ; Move Cb sample into .y component
1418 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1419 * mov t0.z, t1.x ; Move Cr sample into .z component
1420 */
1421 for (i = 0; i < 3; ++i)
1422 {
1423 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1424 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1425
1426 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1427 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1428 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1429 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1430 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1431 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1432
1433 }
1434
1435 /* mul t0, t0, c0 ; Rescale texel to correct range */
1436 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1437 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1438
1439 /*
1440 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1441 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1442 */
1443 for (i = 0; i < 2; ++i)
1444 {
1445 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1446 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1447 }
1448
1449 /* XXX: Pos values off by 0.5? */
1450 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1451 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1452 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1453 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1454 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1455 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1456 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1457 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1458 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1459 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1460 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1461
1462 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1463 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1464 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1465 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1466 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1467 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1468 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1469
1470 /* floor t3, t3 ; Get rid of fractional part */
1471 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1472 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1473
1474 /* mul t3, t3, c1.y ; Multiply by 2 */
1475 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1476 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1477 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1478 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1479 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1480 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1481
1482 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1483 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1484 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1485
1486 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1487 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1488 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1489 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1490
1491 /* add o0, t0, t1 ; Add ref and differential to form final output */
1492 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1493 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1494
1495 /* end */
1496 inst = vl_end();
1497 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1498
1499 fs.tokens = tokens;
1500 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1501 free(tokens);
1502
1503 return 0;
1504 }
1505
1506 static int vlCreateVertexShaderFrameBMB
1507 (
1508 struct vlR16SnormBufferedMC *mc
1509 )
1510 {
1511 const unsigned int max_tokens = 100;
1512
1513 struct pipe_context *pipe;
1514 struct pipe_shader_state vs;
1515 struct tgsi_token *tokens;
1516 struct tgsi_header *header;
1517
1518 struct tgsi_full_declaration decl;
1519 struct tgsi_full_instruction inst;
1520
1521 unsigned int ti;
1522 unsigned int i;
1523
1524 assert(mc);
1525
1526 pipe = mc->pipe;
1527 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1528
1529 /* Version */
1530 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1531 /* Header */
1532 header = (struct tgsi_header*)&tokens[1];
1533 *header = tgsi_build_header();
1534 /* Processor */
1535 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1536
1537 ti = 3;
1538
1539 /*
1540 * decl i0 ; Vertex pos
1541 * decl i1 ; Luma texcoords
1542 * decl i2 ; Chroma Cb texcoords
1543 * decl i3 ; Chroma Cr texcoords
1544 * decl i4 ; First ref macroblock top field texcoords
1545 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1546 * decl i6 ; Second ref macroblock top field texcoords
1547 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1548 */
1549 for (i = 0; i < 8; i++)
1550 {
1551 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1552 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1553 }
1554
1555 /*
1556 * decl o0 ; Vertex pos
1557 * decl o1 ; Luma texcoords
1558 * decl o2 ; Chroma Cb texcoords
1559 * decl o3 ; Chroma Cr texcoords
1560 * decl o4 ; First ref macroblock texcoords
1561 * decl o5 ; Second ref macroblock texcoords
1562 */
1563 for (i = 0; i < 6; i++)
1564 {
1565 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1566 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1567 }
1568
1569 /*
1570 * mov o0, i0 ; Move input vertex pos to output
1571 * mov o1, i1 ; Move input luma texcoords to output
1572 * mov o2, i2 ; Move input chroma Cb texcoords to output
1573 * mov o3, i3 ; Move input chroma Cr texcoords to output
1574 */
1575 for (i = 0; i < 4; ++i)
1576 {
1577 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1578 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1579 }
1580
1581 /*
1582 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1583 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1584 */
1585 for (i = 0; i < 2; ++i)
1586 {
1587 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
1588 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1589 }
1590
1591 /* end */
1592 inst = vl_end();
1593 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1594
1595 vs.tokens = tokens;
1596 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1597 free(tokens);
1598
1599 return 0;
1600 }
1601
1602 static int vlCreateVertexShaderFieldBMB
1603 (
1604 struct vlR16SnormBufferedMC *mc
1605 )
1606 {
1607 const unsigned int max_tokens = 100;
1608
1609 struct pipe_context *pipe;
1610 struct pipe_shader_state vs;
1611 struct tgsi_token *tokens;
1612 struct tgsi_header *header;
1613
1614 struct tgsi_full_declaration decl;
1615 struct tgsi_full_instruction inst;
1616
1617 unsigned int ti;
1618 unsigned int i;
1619
1620 assert(mc);
1621
1622 pipe = mc->pipe;
1623 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1624
1625 /* Version */
1626 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1627 /* Header */
1628 header = (struct tgsi_header*)&tokens[1];
1629 *header = tgsi_build_header();
1630 /* Processor */
1631 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1632
1633 ti = 3;
1634
1635 /*
1636 * decl i0 ; Vertex pos
1637 * decl i1 ; Luma texcoords
1638 * decl i2 ; Chroma Cb texcoords
1639 * decl i3 ; Chroma Cr texcoords
1640 * decl i4 ; First ref macroblock top field texcoords
1641 * decl i5 ; First ref macroblock bottom field texcoords
1642 * decl i6 ; Second ref macroblock top field texcoords
1643 * decl i7 ; Second ref macroblock bottom field texcoords
1644 */
1645 for (i = 0; i < 8; i++)
1646 {
1647 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1648 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1649 }
1650
1651 /* decl c0 ; Denorm coefficients */
1652 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1653 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1654
1655 /*
1656 * decl o0 ; Vertex pos
1657 * decl o1 ; Luma texcoords
1658 * decl o2 ; Chroma Cb texcoords
1659 * decl o3 ; Chroma Cr texcoords
1660 * decl o4 ; First ref macroblock top field texcoords
1661 * decl o5 ; First ref macroblock Bottom field texcoords
1662 * decl o6 ; Second ref macroblock top field texcoords
1663 * decl o7 ; Second ref macroblock Bottom field texcoords
1664 * decl o8 ; Denormalized vertex pos
1665 */
1666 for (i = 0; i < 9; i++)
1667 {
1668 decl = vl_decl_output((i == 0 || i == 8) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1669 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1670 }
1671
1672 /* decl t0, t1 */
1673 decl = vl_decl_temps(0, 1);
1674 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1675
1676 /*
1677 * mov o0, i0 ; Move input vertex pos to output
1678 * mov o1, i1 ; Move input luma texcoords to output
1679 * mov o2, i2 ; Move input chroma Cb texcoords to output
1680 * mov o3, i3 ; Move input chroma Cr texcoords to output
1681 */
1682 for (i = 0; i < 4; ++i)
1683 {
1684 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1685 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1686 }
1687
1688 /*
1689 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1690 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1691 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1692 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1693 */
1694 for (i = 0; i < 4; ++i)
1695 {
1696 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1697 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1698 }
1699
1700 /* mul o8, i0, c0 ; Denorm vertex pos */
1701 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1702 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1703
1704 /* end */
1705 inst = vl_end();
1706 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1707
1708 vs.tokens = tokens;
1709 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1710 free(tokens);
1711
1712 return 0;
1713 }
1714
1715 static int vlCreateFragmentShaderFrameBMB
1716 (
1717 struct vlR16SnormBufferedMC *mc
1718 )
1719 {
1720 const unsigned int max_tokens = 100;
1721
1722 struct pipe_context *pipe;
1723 struct pipe_shader_state fs;
1724 struct tgsi_token *tokens;
1725 struct tgsi_header *header;
1726
1727 struct tgsi_full_declaration decl;
1728 struct tgsi_full_instruction inst;
1729
1730 unsigned int ti;
1731 unsigned int i;
1732
1733 assert(mc);
1734
1735 pipe = mc->pipe;
1736 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1737
1738 /* Version */
1739 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1740 /* Header */
1741 header = (struct tgsi_header*)&tokens[1];
1742 *header = tgsi_build_header();
1743 /* Processor */
1744 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1745
1746 ti = 3;
1747
1748 /*
1749 * decl i0 ; Luma texcoords
1750 * decl i1 ; Chroma Cb texcoords
1751 * decl i2 ; Chroma Cr texcoords
1752 * decl i3 ; First ref macroblock texcoords
1753 * decl i4 ; Second ref macroblock texcoords
1754 */
1755 for (i = 0; i < 5; ++i)
1756 {
1757 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1758 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1759 }
1760
1761 /*
1762 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1763 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1764 */
1765 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1766 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1767
1768 /* decl o0 ; Fragment color */
1769 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1770 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1771
1772 /* decl t0-t2 */
1773 decl = vl_decl_temps(0, 2);
1774 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1775
1776 /*
1777 * decl s0 ; Sampler for luma texture
1778 * decl s1 ; Sampler for chroma Cb texture
1779 * decl s2 ; Sampler for chroma Cr texture
1780 * decl s3 ; Sampler for first ref surface texture
1781 * decl s4 ; Sampler for second ref surface texture
1782 */
1783 for (i = 0; i < 5; ++i)
1784 {
1785 decl = vl_decl_samplers(i, i);
1786 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1787 }
1788
1789 /*
1790 * tex2d t1, i0, s0 ; Read texel from luma texture
1791 * mov t0.x, t1.x ; Move luma sample into .x component
1792 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1793 * mov t0.y, t1.x ; Move Cb sample into .y component
1794 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1795 * mov t0.z, t1.x ; Move Cr sample into .z component
1796 */
1797 for (i = 0; i < 3; ++i)
1798 {
1799 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1800 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1801
1802 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1803 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1804 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1805 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1806 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1807 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1808
1809 }
1810
1811 /* mul t0, t0, c0 ; Rescale texel to correct range */
1812 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1813 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1814
1815 /*
1816 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1817 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1818 */
1819 for (i = 0; i < 2; ++i)
1820 {
1821 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
1822 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1823 }
1824
1825 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1826 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1827 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1828 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1829 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1830 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1831 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1832
1833 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1834 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1835 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1836
1837 /* end */
1838 inst = vl_end();
1839 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1840
1841 fs.tokens = tokens;
1842 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1843 free(tokens);
1844
1845 return 0;
1846 }
1847
1848 static int vlCreateFragmentShaderFieldBMB
1849 (
1850 struct vlR16SnormBufferedMC *mc
1851 )
1852 {
1853 const unsigned int max_tokens = 200;
1854
1855 struct pipe_context *pipe;
1856 struct pipe_shader_state fs;
1857 struct tgsi_token *tokens;
1858 struct tgsi_header *header;
1859
1860 struct tgsi_full_declaration decl;
1861 struct tgsi_full_instruction inst;
1862
1863 unsigned int ti;
1864 unsigned int i;
1865
1866 assert(mc);
1867
1868 pipe = mc->pipe;
1869 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1870
1871 /* Version */
1872 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1873 /* Header */
1874 header = (struct tgsi_header*)&tokens[1];
1875 *header = tgsi_build_header();
1876 /* Processor */
1877 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1878
1879 ti = 3;
1880
1881 /*
1882 * decl i0 ; Luma texcoords
1883 * decl i1 ; Chroma Cb texcoords
1884 * decl i2 ; Chroma Cr texcoords
1885 * decl i3 ; First ref macroblock top field texcoords
1886 * decl i4 ; First ref macroblock bottom field texcoords
1887 * decl i5 ; Second ref macroblock top field texcoords
1888 * decl i6 ; Second ref macroblock bottom field texcoords
1889 * decl i7 ; Denormalized vertex pos
1890 */
1891 for (i = 0; i < 8; ++i)
1892 {
1893 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1894 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1895 }
1896
1897 /*
1898 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1899 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1900 * ; and for Y-mod-2 top/bottom field selection
1901 */
1902 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1903 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1904
1905 /* decl o0 ; Fragment color */
1906 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1907 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1908
1909 /* decl t0-t5 */
1910 decl = vl_decl_temps(0, 5);
1911 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1912
1913 /*
1914 * decl s0 ; Sampler for luma texture
1915 * decl s1 ; Sampler for chroma Cb texture
1916 * decl s2 ; Sampler for chroma Cr texture
1917 * decl s3 ; Sampler for first ref surface texture
1918 * decl s4 ; Sampler for second ref surface texture
1919 */
1920 for (i = 0; i < 5; ++i)
1921 {
1922 decl = vl_decl_samplers(i, i);
1923 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1924 }
1925
1926 /*
1927 * tex2d t1, i0, s0 ; Read texel from luma texture
1928 * mov t0.x, t1.x ; Move luma sample into .x component
1929 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1930 * mov t0.y, t1.x ; Move Cb sample into .y component
1931 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1932 * mov t0.z, t1.x ; Move Cr sample into .z component
1933 */
1934 for (i = 0; i < 3; ++i)
1935 {
1936 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1937 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1938
1939 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1940 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1941 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1942 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1943 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1944 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1945
1946 }
1947
1948 /* mul t0, t0, c0 ; Rescale texel to correct range */
1949 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1950 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1951
1952 /* XXX: Pos values off by 0.5? */
1953 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1954 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
1955 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1956 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1957 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1958 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1959 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1960 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1961 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1962 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1963 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1964
1965 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1966 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1967 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1968 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1969 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1970 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1971 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1972
1973 /* floor t3, t3 ; Get rid of fractional part */
1974 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1975 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1976
1977 /* mul t3, t3, c1.y ; Multiply by 2 */
1978 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1979 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1980 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1981 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1982 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1983 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1984
1985 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1986 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1987 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1988
1989 /*
1990 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
1991 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
1992 */
1993 for (i = 0; i < 2; ++i)
1994 {
1995 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1996 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1997 }
1998
1999 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2000 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2001 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2002 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2003
2004 /*
2005 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2006 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2007 */
2008 for (i = 0; i < 2; ++i)
2009 {
2010 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
2011 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2012 }
2013
2014 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2015 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2016 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
2017 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2018
2019 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2020 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2021 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2022 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2023 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2024 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2025 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2026
2027 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2028 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
2029 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2030
2031 /* end */
2032 inst = vl_end();
2033 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2034
2035 fs.tokens = tokens;
2036 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
2037 free(tokens);
2038
2039 return 0;
2040 }
2041
2042 static int vlCreateDataBufs
2043 (
2044 struct vlR16SnormBufferedMC *mc
2045 )
2046 {
2047 const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
2048 const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
2049 const unsigned int num_mb_per_frame = mbw * mbh;
2050
2051 struct pipe_context *pipe;
2052 unsigned int h, i;
2053
2054 assert(mc);
2055
2056 pipe = mc->pipe;
2057
2058 /* Create our vertex buffers */
2059 for (h = 0; h < NUM_BUF_SETS; ++h)
2060 {
2061 mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4;
2062 mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1;
2063 mc->vertex_bufs[h][0].buffer_offset = 0;
2064 mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create
2065 (
2066 pipe->winsys,
2067 1,
2068 PIPE_BUFFER_USAGE_VERTEX,
2069 sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame
2070 );
2071
2072 for (i = 1; i < 3; ++i)
2073 {
2074 mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2;
2075 mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1;
2076 mc->vertex_bufs[h][i].buffer_offset = 0;
2077 mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create
2078 (
2079 pipe->winsys,
2080 1,
2081 PIPE_BUFFER_USAGE_VERTEX,
2082 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
2083 );
2084 }
2085 }
2086
2087 /* Position element */
2088 mc->vertex_elems[0].src_offset = 0;
2089 mc->vertex_elems[0].vertex_buffer_index = 0;
2090 mc->vertex_elems[0].nr_components = 2;
2091 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2092
2093 /* Luma, texcoord element */
2094 mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f);
2095 mc->vertex_elems[1].vertex_buffer_index = 0;
2096 mc->vertex_elems[1].nr_components = 2;
2097 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
2098
2099 /* Chroma Cr texcoord element */
2100 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2;
2101 mc->vertex_elems[2].vertex_buffer_index = 0;
2102 mc->vertex_elems[2].nr_components = 2;
2103 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
2104
2105 /* Chroma Cb texcoord element */
2106 mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3;
2107 mc->vertex_elems[3].vertex_buffer_index = 0;
2108 mc->vertex_elems[3].nr_components = 2;
2109 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
2110
2111 /* First ref surface top field texcoord element */
2112 mc->vertex_elems[4].src_offset = 0;
2113 mc->vertex_elems[4].vertex_buffer_index = 1;
2114 mc->vertex_elems[4].nr_components = 2;
2115 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
2116
2117 /* First ref surface bottom field texcoord element */
2118 mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f);
2119 mc->vertex_elems[5].vertex_buffer_index = 1;
2120 mc->vertex_elems[5].nr_components = 2;
2121 mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
2122
2123 /* Second ref surface top field texcoord element */
2124 mc->vertex_elems[6].src_offset = 0;
2125 mc->vertex_elems[6].vertex_buffer_index = 2;
2126 mc->vertex_elems[6].nr_components = 2;
2127 mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
2128
2129 /* Second ref surface bottom field texcoord element */
2130 mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f);
2131 mc->vertex_elems[7].vertex_buffer_index = 2;
2132 mc->vertex_elems[7].nr_components = 2;
2133 mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
2134
2135 /* Create our constant buffer */
2136 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
2137 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
2138 (
2139 pipe->winsys,
2140 1,
2141 PIPE_BUFFER_USAGE_CONSTANT,
2142 mc->vs_const_buf.size
2143 );
2144
2145 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
2146 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
2147 (
2148 pipe->winsys,
2149 1,
2150 PIPE_BUFFER_USAGE_CONSTANT,
2151 mc->fs_const_buf.size
2152 );
2153
2154 memcpy
2155 (
2156 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2157 &fs_consts,
2158 sizeof(struct vlFragmentShaderConsts)
2159 );
2160
2161 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
2162
2163 mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame);
2164
2165 return 0;
2166 }
2167
2168 static int vlInit
2169 (
2170 struct vlR16SnormBufferedMC *mc
2171 )
2172 {
2173 struct pipe_context *pipe;
2174 struct pipe_sampler_state sampler;
2175 struct pipe_texture template;
2176 unsigned int filters[5];
2177 unsigned int i;
2178
2179 assert(mc);
2180
2181 pipe = mc->pipe;
2182
2183 /* For MC we render to textures, which are rounded up to nearest POT */
2184 mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width);
2185 mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height);
2186 mc->viewport.scale[2] = 1;
2187 mc->viewport.scale[3] = 1;
2188 mc->viewport.translate[0] = 0;
2189 mc->viewport.translate[1] = 0;
2190 mc->viewport.translate[2] = 0;
2191 mc->viewport.translate[3] = 0;
2192
2193 mc->render_target.width = vlRoundUpPOT(mc->picture_width);
2194 mc->render_target.height = vlRoundUpPOT(mc->picture_height);
2195 mc->render_target.num_cbufs = 1;
2196 /* FB for MC stage is a vlSurface created by the user, set at render time */
2197 mc->render_target.zsbuf = NULL;
2198
2199 filters[0] = PIPE_TEX_FILTER_NEAREST;
2200 /* FIXME: Linear causes discoloration around block edges */
2201 filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2202 filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2203 filters[3] = PIPE_TEX_FILTER_LINEAR;
2204 filters[4] = PIPE_TEX_FILTER_LINEAR;
2205
2206 for (i = 0; i < 5; ++i)
2207 {
2208 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2209 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2210 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2211 sampler.min_img_filter = filters[i];
2212 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2213 sampler.mag_img_filter = filters[i];
2214 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2215 sampler.compare_func = PIPE_FUNC_ALWAYS;
2216 sampler.normalized_coords = 1;
2217 /*sampler.prefilter = ;*/
2218 /*sampler.shadow_ambient = ;*/
2219 /*sampler.lod_bias = ;*/
2220 sampler.min_lod = 0;
2221 /*sampler.max_lod = ;*/
2222 /*sampler.border_color[i] = ;*/
2223 /*sampler.max_anisotropy = ;*/
2224 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2225 }
2226
2227 memset(&template, 0, sizeof(struct pipe_texture));
2228 template.target = PIPE_TEXTURE_2D;
2229 template.format = PIPE_FORMAT_R16_SNORM;
2230 template.last_level = 0;
2231 template.width[0] = vlRoundUpPOT(mc->picture_width);
2232 template.height[0] = vlRoundUpPOT(mc->picture_height);
2233 template.depth[0] = 1;
2234 template.compressed = 0;
2235 pf_get_block(template.format, &template.block);
2236
2237 for (i = 0; i < NUM_BUF_SETS; ++i)
2238 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2239
2240 if (mc->picture_format == vlFormatYCbCr420)
2241 {
2242 template.width[0] = vlRoundUpPOT(mc->picture_width / 2);
2243 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2244 }
2245 else if (mc->picture_format == vlFormatYCbCr422)
2246 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2247
2248 for (i = 0; i < NUM_BUF_SETS; ++i)
2249 {
2250 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2251 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2252 }
2253
2254 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2255
2256 vlCreateVertexShaderIMB(mc);
2257 vlCreateFragmentShaderIMB(mc);
2258 vlCreateVertexShaderFramePMB(mc);
2259 vlCreateVertexShaderFieldPMB(mc);
2260 vlCreateFragmentShaderFramePMB(mc);
2261 vlCreateFragmentShaderFieldPMB(mc);
2262 vlCreateVertexShaderFrameBMB(mc);
2263 vlCreateVertexShaderFieldBMB(mc);
2264 vlCreateFragmentShaderFrameBMB(mc);
2265 vlCreateFragmentShaderFieldBMB(mc);
2266 vlCreateDataBufs(mc);
2267
2268 return 0;
2269 }
2270
2271 int vlCreateR16SNormBufferedMC
2272 (
2273 struct pipe_context *pipe,
2274 unsigned int picture_width,
2275 unsigned int picture_height,
2276 enum vlFormat picture_format,
2277 struct vlRender **render
2278 )
2279 {
2280 struct vlR16SnormBufferedMC *mc;
2281
2282 assert(pipe);
2283 assert(render);
2284
2285 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2286
2287 mc->base.vlBegin = &vlBegin;
2288 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2289 mc->base.vlEnd = &vlEnd;
2290 mc->base.vlFlush = &vlFlush;
2291 mc->base.vlDestroy = &vlDestroy;
2292 mc->pipe = pipe;
2293 mc->picture_width = picture_width;
2294 mc->picture_height = picture_height;
2295
2296 mc->cur_buf = 0;
2297 mc->buffered_surface = NULL;
2298 mc->past_surface = NULL;
2299 mc->future_surface = NULL;
2300 mc->num_macroblocks = 0;
2301
2302 vlInit(mc);
2303
2304 *render = &mc->base;
2305
2306 return 0;
2307 }