Merge branch 'gallium-0.2' of git+ssh://marcheu@git.freedesktop.org/git/nouveau/mesa...
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_inlines.h>
10 #include <tgsi/tgsi_parse.h>
11 #include <tgsi/tgsi_build.h>
12 #include <util/u_math.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 /*
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
27 * than we destroy.
28 */
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
30
31 enum vlMacroBlockTypeEx
32 {
33 vlMacroBlockExTypeIntra,
34 vlMacroBlockExTypeFwdPredictedFrame,
35 vlMacroBlockExTypeFwdPredictedField,
36 vlMacroBlockExTypeBkwdPredictedFrame,
37 vlMacroBlockExTypeBkwdPredictedField,
38 vlMacroBlockExTypeBiPredictedFrame,
39 vlMacroBlockExTypeBiPredictedField,
40
41 vlNumMacroBlockExTypes
42 };
43
44 struct vlVertexShaderConsts
45 {
46 struct vlVertex4f denorm;
47 };
48
49 struct vlFragmentShaderConsts
50 {
51 struct vlVertex4f multiplier;
52 struct vlVertex4f div;
53 };
54
55 struct vlR16SnormBufferedMC
56 {
57 struct vlRender base;
58
59 unsigned int picture_width, picture_height;
60 enum vlFormat picture_format;
61
62 unsigned int cur_buf;
63 struct vlSurface *buffered_surface;
64 struct vlSurface *past_surface, *future_surface;
65 struct vlVertex2f surface_tex_inv_size;
66 struct vlVertex2f zero_block[3];
67 unsigned int num_macroblocks;
68 struct vlMpeg2MacroBlock *macroblocks;
69
70 struct pipe_context *pipe;
71 struct pipe_viewport_state viewport;
72 struct pipe_framebuffer_state render_target;
73 struct pipe_sampler_state *samplers[5];
74 struct pipe_texture *textures[NUM_BUF_SETS][5];
75 void *i_vs, *p_vs[2], *b_vs[2];
76 void *i_fs, *p_fs[2], *b_fs[2];
77 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3];
78 struct pipe_vertex_element vertex_elems[8];
79 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
80 };
81
82 static int vlBegin
83 (
84 struct vlRender *render
85 )
86 {
87 assert(render);
88
89 return 0;
90 }
91
92 static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
93 {
94 unsigned int y;
95
96 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
97 memcpy
98 (
99 dst + y * dst_pitch,
100 src + y * VL_BLOCK_WIDTH,
101 VL_BLOCK_WIDTH * 2
102 );
103
104 return 0;
105 }
106
107 static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
108 {
109 unsigned int y;
110
111 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
112 memcpy
113 (
114 dst + y * dst_pitch * 2,
115 src + y * VL_BLOCK_WIDTH,
116 VL_BLOCK_WIDTH * 2
117 );
118
119 return 0;
120 }
121
122 static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
123 {
124 unsigned int y;
125
126 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
127 memset
128 (
129 dst + y * dst_pitch,
130 0,
131 VL_BLOCK_WIDTH * 2
132 );
133
134 return 0;
135 }
136
137 static inline int vlGrabBlocks
138 (
139 struct vlR16SnormBufferedMC *mc,
140 unsigned int mbx,
141 unsigned int mby,
142 enum vlDCTType dct_type,
143 unsigned int coded_block_pattern,
144 short *blocks
145 )
146 {
147 struct pipe_surface *tex_surface;
148 short *texels;
149 unsigned int tex_pitch;
150 unsigned int x, y, tb = 0, sb = 0;
151 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
152
153 assert(mc);
154 assert(blocks);
155
156 tex_surface = mc->pipe->screen->get_tex_surface
157 (
158 mc->pipe->screen,
159 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
161 );
162
163 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
164 tex_pitch = tex_surface->stride / tex_surface->block.size;
165
166 texels += mbpy * tex_pitch + mbpx;
167
168 for (y = 0; y < 2; ++y)
169 {
170 for (x = 0; x < 2; ++x, ++tb)
171 {
172 if ((coded_block_pattern >> (5 - tb)) & 1)
173 {
174 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
175
176 if (dct_type == vlDCTTypeFrameCoded)
177 {
178 vlGrabFrameCodedBlock
179 (
180 cur_block,
181 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
182 tex_pitch
183 );
184 }
185 else
186 {
187 vlGrabFieldCodedBlock
188 (
189 cur_block,
190 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
191 tex_pitch
192 );
193 }
194
195 ++sb;
196 }
197 else if (mc->zero_block[0].x < 0.0f)
198 {
199 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
200
201 mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x;
202 mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y;
203 }
204 }
205 }
206
207 pipe_surface_unmap(tex_surface);
208
209 /* TODO: Implement 422, 444 */
210 mbpx >>= 1;
211 mbpy >>= 1;
212
213 for (tb = 0; tb < 2; ++tb)
214 {
215 tex_surface = mc->pipe->screen->get_tex_surface
216 (
217 mc->pipe->screen,
218 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
219 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
220 );
221
222 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
223 tex_pitch = tex_surface->stride / tex_surface->block.size;
224
225 texels += mbpy * tex_pitch + mbpx;
226
227 if ((coded_block_pattern >> (1 - tb)) & 1)
228 {
229 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
230
231 vlGrabFrameCodedBlock
232 (
233 cur_block,
234 texels,
235 tex_pitch
236 );
237
238 ++sb;
239 }
240 else if (mc->zero_block[tb + 1].x < 0.0f)
241 {
242 vlGrabNoBlock(texels, tex_pitch);
243
244 mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x;
245 mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y;
246 }
247
248 pipe_surface_unmap(tex_surface);
249 }
250
251 return 0;
252 }
253
254 static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb)
255 {
256 assert(mb);
257
258 switch (mb->mb_type)
259 {
260 case vlMacroBlockTypeIntra:
261 return vlMacroBlockExTypeIntra;
262 case vlMacroBlockTypeFwdPredicted:
263 return mb->mo_type == vlMotionTypeFrame ?
264 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
265 case vlMacroBlockTypeBkwdPredicted:
266 return mb->mo_type == vlMotionTypeFrame ?
267 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
268 case vlMacroBlockTypeBiPredicted:
269 return mb->mo_type == vlMotionTypeFrame ?
270 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
271 default:
272 assert(0);
273 }
274
275 /* Unreachable */
276 return -1;
277 }
278
279 static inline int vlGrabMacroBlock
280 (
281 struct vlR16SnormBufferedMC *mc,
282 struct vlMpeg2MacroBlock *macroblock
283 )
284 {
285 assert(mc);
286 assert(macroblock);
287
288 mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx;
289 mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby;
290 mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type;
291 mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type;
292 mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type;
293 mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0];
294 mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1];
295 mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0];
296 mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1];
297 mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0];
298 mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1];
299 mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0];
300 mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1];
301 mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp;
302 mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks;
303
304 vlGrabBlocks
305 (
306 mc,
307 macroblock->mbx,
308 macroblock->mby,
309 macroblock->dct_type,
310 macroblock->cbp,
311 macroblock->blocks
312 );
313
314 mc->num_macroblocks++;
315
316 return 0;
317 }
318
319 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
320 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
321 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
322 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
323 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
324 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
325 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
326 \
327 if ((cbp) & (lm)) \
328 { \
329 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
330 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
331 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
332 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
333 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
334 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
335 } \
336 else \
337 { \
338 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
339 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
340 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
341 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
342 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
343 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
344 } \
345 \
346 if ((cbp) & (cbm)) \
347 { \
348 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
349 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
350 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
351 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
352 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
353 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
354 } \
355 else \
356 { \
357 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
358 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
359 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
360 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
361 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
362 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
363 } \
364 \
365 if ((cbp) & (crm)) \
366 { \
367 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
368 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
369 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
370 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
371 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
372 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
373 } \
374 else \
375 { \
376 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
377 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
378 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
379 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
380 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
381 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
382 }
383
384 static inline int vlGrabMacroBlockVB
385 (
386 struct vlR16SnormBufferedMC *mc,
387 struct vlMpeg2MacroBlock *macroblock,
388 unsigned int pos
389 )
390 {
391 struct vlVertex2f mo_vec[2];
392 unsigned int i;
393
394 assert(mc);
395 assert(macroblock);
396
397 switch (macroblock->mb_type)
398 {
399 case vlMacroBlockTypeBiPredicted:
400 {
401 struct vlVertex2f *vb;
402
403 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
404 (
405 mc->pipe->winsys,
406 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer,
407 PIPE_BUFFER_USAGE_CPU_WRITE
408 ) + pos * 2 * 24;
409
410 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
411 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
412
413 if (macroblock->mo_type == vlMotionTypeFrame)
414 {
415 for (i = 0; i < 24 * 2; i += 2)
416 {
417 vb[i].x = mo_vec[0].x;
418 vb[i].y = mo_vec[0].y;
419 }
420 }
421 else
422 {
423 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
424 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
425
426 for (i = 0; i < 24 * 2; i += 2)
427 {
428 vb[i].x = mo_vec[0].x;
429 vb[i].y = mo_vec[0].y;
430 vb[i + 1].x = mo_vec[1].x;
431 vb[i + 1].y = mo_vec[1].y;
432 }
433 }
434
435 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer);
436
437 /* fall-through */
438 }
439 case vlMacroBlockTypeFwdPredicted:
440 case vlMacroBlockTypeBkwdPredicted:
441 {
442 struct vlVertex2f *vb;
443
444 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
445 (
446 mc->pipe->winsys,
447 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer,
448 PIPE_BUFFER_USAGE_CPU_WRITE
449 ) + pos * 2 * 24;
450
451 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
452 {
453 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
454 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
455
456 if (macroblock->mo_type == vlMotionTypeField)
457 {
458 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
459 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
460 }
461 }
462 else
463 {
464 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
465 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
466
467 if (macroblock->mo_type == vlMotionTypeField)
468 {
469 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
470 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
471 }
472 }
473
474 if (macroblock->mo_type == vlMotionTypeFrame)
475 {
476 for (i = 0; i < 24 * 2; i += 2)
477 {
478 vb[i].x = mo_vec[0].x;
479 vb[i].y = mo_vec[0].y;
480 }
481 }
482 else
483 {
484 for (i = 0; i < 24 * 2; i += 2)
485 {
486 vb[i].x = mo_vec[0].x;
487 vb[i].y = mo_vec[0].y;
488 vb[i + 1].x = mo_vec[1].x;
489 vb[i + 1].y = mo_vec[1].y;
490 }
491 }
492
493 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer);
494
495 /* fall-through */
496 }
497 case vlMacroBlockTypeIntra:
498 {
499 const struct vlVertex2f unit =
500 {
501 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
502 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
503 };
504 const struct vlVertex2f half =
505 {
506 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
507 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
508 };
509
510 struct vlMacroBlockVertexStream0
511 {
512 struct vlVertex2f pos;
513 struct vlVertex2f luma_tc;
514 struct vlVertex2f cb_tc;
515 struct vlVertex2f cr_tc;
516 } *vb;
517
518 vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map
519 (
520 mc->pipe->winsys,
521 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer,
522 PIPE_BUFFER_USAGE_CPU_WRITE
523 ) + pos * 24;
524
525 SET_BLOCK
526 (
527 vb,
528 macroblock->cbp, macroblock->mbx, macroblock->mby,
529 unit.x, unit.y, 0, 0, half.x, half.y,
530 32, 2, 1, mc->zero_block
531 );
532
533 SET_BLOCK
534 (
535 vb + 6,
536 macroblock->cbp, macroblock->mbx, macroblock->mby,
537 unit.x, unit.y, half.x, 0, half.x, half.y,
538 16, 2, 1, mc->zero_block
539 );
540
541 SET_BLOCK
542 (
543 vb + 12,
544 macroblock->cbp, macroblock->mbx, macroblock->mby,
545 unit.x, unit.y, 0, half.y, half.x, half.y,
546 8, 2, 1, mc->zero_block
547 );
548
549 SET_BLOCK
550 (
551 vb + 18,
552 macroblock->cbp, macroblock->mbx, macroblock->mby,
553 unit.x, unit.y, half.x, half.y, half.x, half.y,
554 4, 2, 1, mc->zero_block
555 );
556
557 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer);
558
559 break;
560 }
561 default:
562 assert(0);
563 }
564
565 return 0;
566 }
567
568 static int vlFlush
569 (
570 struct vlRender *render
571 )
572 {
573 struct vlR16SnormBufferedMC *mc;
574 struct pipe_context *pipe;
575 struct vlVertexShaderConsts *vs_consts;
576 unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0};
577 unsigned int offset[vlNumMacroBlockExTypes];
578 unsigned int vb_start = 0;
579 unsigned int mbw;
580 unsigned int mbh;
581 unsigned int num_mb_per_frame;
582 unsigned int i;
583
584 assert(render);
585
586 mc = (struct vlR16SnormBufferedMC*)render;
587
588 if (!mc->buffered_surface)
589 return 0;
590
591 mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
592 mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
593 num_mb_per_frame = mbw * mbh;
594
595 if (mc->num_macroblocks < num_mb_per_frame)
596 return 0;
597
598 pipe = mc->pipe;
599
600 for (i = 0; i < mc->num_macroblocks; ++i)
601 {
602 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
603
604 num_macroblocks[mb_type_ex]++;
605 }
606
607 offset[0] = 0;
608
609 for (i = 1; i < vlNumMacroBlockExTypes; ++i)
610 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
611
612 for (i = 0; i < mc->num_macroblocks; ++i)
613 {
614 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
615
616 vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]);
617
618 offset[mb_type_ex]++;
619 }
620
621 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
622 (
623 pipe->screen,
624 mc->buffered_surface->texture,
625 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
626 );
627
628 pipe->set_framebuffer_state(pipe, &mc->render_target);
629 pipe->set_viewport_state(pipe, &mc->viewport);
630 vs_consts = pipe->winsys->buffer_map
631 (
632 pipe->winsys,
633 mc->vs_const_buf.buffer,
634 PIPE_BUFFER_USAGE_CPU_WRITE
635 );
636
637 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
638 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
639
640 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
641 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
642 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
643
644 if (num_macroblocks[vlMacroBlockExTypeIntra] > 0)
645 {
646 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
647 pipe->set_vertex_elements(pipe, 4, mc->vertex_elems);
648 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
649 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
650 pipe->bind_vs_state(pipe, mc->i_vs);
651 pipe->bind_fs_state(pipe, mc->i_fs);
652
653 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24);
654 vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24;
655 }
656
657 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
658 {
659 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
660 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
661 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
662 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
663 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
664 pipe->bind_vs_state(pipe, mc->p_vs[0]);
665 pipe->bind_fs_state(pipe, mc->p_fs[0]);
666
667 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
668 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24;
669 }
670
671 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
672 {
673 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
674 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
675 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
676 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
677 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
678 pipe->bind_vs_state(pipe, mc->p_vs[1]);
679 pipe->bind_fs_state(pipe, mc->p_fs[1]);
680
681 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
682 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24;
683 }
684
685 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
686 {
687 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
688 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
689 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
690 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
691 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
692 pipe->bind_vs_state(pipe, mc->p_vs[0]);
693 pipe->bind_fs_state(pipe, mc->p_fs[0]);
694
695 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
696 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24;
697 }
698
699 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
700 {
701 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
702 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
703 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
704 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
705 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
706 pipe->bind_vs_state(pipe, mc->p_vs[1]);
707 pipe->bind_fs_state(pipe, mc->p_fs[1]);
708
709 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
710 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24;
711 }
712
713 if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
714 {
715 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
716 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
717 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
718 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
719 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
720 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
721 pipe->bind_vs_state(pipe, mc->b_vs[0]);
722 pipe->bind_fs_state(pipe, mc->b_fs[0]);
723
724 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
725 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24;
726 }
727
728 if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
729 {
730 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
731 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
732 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
733 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
734 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
735 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
736 pipe->bind_vs_state(pipe, mc->b_vs[1]);
737 pipe->bind_fs_state(pipe, mc->b_fs[1]);
738
739 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
740 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24;
741 }
742
743 pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence);
744
745 for (i = 0; i < 3; ++i)
746 mc->zero_block[i].x = -1.0f;
747
748 mc->buffered_surface = NULL;
749 mc->num_macroblocks = 0;
750 mc->cur_buf++;
751
752 return 0;
753 }
754
755 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
756 (
757 struct vlRender *render,
758 struct vlMpeg2MacroBlockBatch *batch,
759 struct vlSurface *surface
760 )
761 {
762 struct vlR16SnormBufferedMC *mc;
763 unsigned int i;
764
765 assert(render);
766
767 mc = (struct vlR16SnormBufferedMC*)render;
768
769 if (mc->buffered_surface)
770 {
771 if (mc->buffered_surface != surface)
772 {
773 vlFlush(&mc->base);
774 mc->buffered_surface = surface;
775 mc->past_surface = batch->past_surface;
776 mc->future_surface = batch->future_surface;
777 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
778 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
779 }
780 }
781 else
782 {
783 mc->buffered_surface = surface;
784 mc->past_surface = batch->past_surface;
785 mc->future_surface = batch->future_surface;
786 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
787 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
788 }
789
790 for (i = 0; i < batch->num_macroblocks; ++i)
791 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
792
793 return 0;
794 }
795
796 static int vlEnd
797 (
798 struct vlRender *render
799 )
800 {
801 assert(render);
802
803 return 0;
804 }
805
806 static int vlDestroy
807 (
808 struct vlRender *render
809 )
810 {
811 struct vlR16SnormBufferedMC *mc;
812 struct pipe_context *pipe;
813 unsigned int h, i;
814
815 assert(render);
816
817 mc = (struct vlR16SnormBufferedMC*)render;
818 pipe = mc->pipe;
819
820 for (i = 0; i < 5; ++i)
821 pipe->delete_sampler_state(pipe, mc->samplers[i]);
822
823 for (h = 0; h < NUM_BUF_SETS; ++h)
824 for (i = 0; i < 3; ++i)
825 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer);
826
827 /* Textures 3 & 4 are not created directly, no need to release them here */
828 for (i = 0; i < NUM_BUF_SETS; ++i)
829 {
830 pipe_texture_release(&mc->textures[i][0]);
831 pipe_texture_release(&mc->textures[i][1]);
832 pipe_texture_release(&mc->textures[i][2]);
833 }
834
835 pipe->delete_vs_state(pipe, mc->i_vs);
836 pipe->delete_fs_state(pipe, mc->i_fs);
837
838 for (i = 0; i < 2; ++i)
839 {
840 pipe->delete_vs_state(pipe, mc->p_vs[i]);
841 pipe->delete_fs_state(pipe, mc->p_fs[i]);
842 pipe->delete_vs_state(pipe, mc->b_vs[i]);
843 pipe->delete_fs_state(pipe, mc->b_fs[i]);
844 }
845
846 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
847 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
848
849 free(mc->macroblocks);
850 free(mc);
851
852 return 0;
853 }
854
855 /*
856 * Muliplier renormalizes block samples from 16 bits to 12 bits.
857 * Divider is used when calculating Y % 2 for choosing top or bottom
858 * field for P or B macroblocks.
859 * TODO: Use immediates.
860 */
861 static const struct vlFragmentShaderConsts fs_consts =
862 {
863 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
864 {0.5f, 2.0f, 0.0f, 0.0f}
865 };
866
867 static int vlCreateVertexShaderIMB
868 (
869 struct vlR16SnormBufferedMC *mc
870 )
871 {
872 const unsigned int max_tokens = 50;
873
874 struct pipe_context *pipe;
875 struct pipe_shader_state vs;
876 struct tgsi_token *tokens;
877 struct tgsi_header *header;
878
879 struct tgsi_full_declaration decl;
880 struct tgsi_full_instruction inst;
881
882 unsigned int ti;
883 unsigned int i;
884
885 assert(mc);
886
887 pipe = mc->pipe;
888 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
889
890 /* Version */
891 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
892 /* Header */
893 header = (struct tgsi_header*)&tokens[1];
894 *header = tgsi_build_header();
895 /* Processor */
896 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
897
898 ti = 3;
899
900 /*
901 * decl i0 ; Vertex pos
902 * decl i1 ; Luma texcoords
903 * decl i2 ; Chroma Cb texcoords
904 * decl i3 ; Chroma Cr texcoords
905 */
906 for (i = 0; i < 4; i++)
907 {
908 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
909 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
910 }
911
912 /*
913 * decl o0 ; Vertex pos
914 * decl o1 ; Luma texcoords
915 * decl o2 ; Chroma Cb texcoords
916 * decl o3 ; Chroma Cr texcoords
917 */
918 for (i = 0; i < 4; i++)
919 {
920 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
921 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
922 }
923
924 /*
925 * mov o0, i0 ; Move input vertex pos to output
926 * mov o1, i1 ; Move input luma texcoords to output
927 * mov o2, i2 ; Move input chroma Cb texcoords to output
928 * mov o3, i3 ; Move input chroma Cr texcoords to output
929 */
930 for (i = 0; i < 4; ++i)
931 {
932 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
933 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
934 }
935
936 /* end */
937 inst = vl_end();
938 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
939
940 vs.tokens = tokens;
941 mc->i_vs = pipe->create_vs_state(pipe, &vs);
942 free(tokens);
943
944 return 0;
945 }
946
947 static int vlCreateFragmentShaderIMB
948 (
949 struct vlR16SnormBufferedMC *mc
950 )
951 {
952 const unsigned int max_tokens = 100;
953
954 struct pipe_context *pipe;
955 struct pipe_shader_state fs;
956 struct tgsi_token *tokens;
957 struct tgsi_header *header;
958
959 struct tgsi_full_declaration decl;
960 struct tgsi_full_instruction inst;
961
962 unsigned int ti;
963 unsigned int i;
964
965 assert(mc);
966
967 pipe = mc->pipe;
968 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
969
970 /* Version */
971 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
972 /* Header */
973 header = (struct tgsi_header*)&tokens[1];
974 *header = tgsi_build_header();
975 /* Processor */
976 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
977
978 ti = 3;
979
980 /*
981 * decl i0 ; Luma texcoords
982 * decl i1 ; Chroma Cb texcoords
983 * decl i2 ; Chroma Cr texcoords
984 */
985 for (i = 0; i < 3; ++i)
986 {
987 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
988 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
989 }
990
991 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
992 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
993 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
994
995 /* decl o0 ; Fragment color */
996 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
997 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
998
999 /* decl t0, t1 */
1000 decl = vl_decl_temps(0, 1);
1001 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1002
1003 /*
1004 * decl s0 ; Sampler for luma texture
1005 * decl s1 ; Sampler for chroma Cb texture
1006 * decl s2 ; Sampler for chroma Cr texture
1007 */
1008 for (i = 0; i < 3; ++i)
1009 {
1010 decl = vl_decl_samplers(i, i);
1011 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
1012 }
1013
1014 /*
1015 * tex2d t1, i0, s0 ; Read texel from luma texture
1016 * mov t0.x, t1.x ; Move luma sample into .x component
1017 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1018 * mov t0.y, t1.x ; Move Cb sample into .y component
1019 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1020 * mov t0.z, t1.x ; Move Cr sample into .z component
1021 */
1022 for (i = 0; i < 3; ++i)
1023 {
1024 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1025 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1026
1027 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1028 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1029 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1030 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1031 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1032 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1033 }
1034
1035 /* mul o0, t0, c0 ; Rescale texel to correct range */
1036 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1037 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1038
1039 /* end */
1040 inst = vl_end();
1041 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1042
1043 fs.tokens = tokens;
1044 mc->i_fs = pipe->create_fs_state(pipe, &fs);
1045 free(tokens);
1046
1047 return 0;
1048 }
1049
1050 static int vlCreateVertexShaderFramePMB
1051 (
1052 struct vlR16SnormBufferedMC *mc
1053 )
1054 {
1055 const unsigned int max_tokens = 100;
1056
1057 struct pipe_context *pipe;
1058 struct pipe_shader_state vs;
1059 struct tgsi_token *tokens;
1060 struct tgsi_header *header;
1061
1062 struct tgsi_full_declaration decl;
1063 struct tgsi_full_instruction inst;
1064
1065 unsigned int ti;
1066 unsigned int i;
1067
1068 assert(mc);
1069
1070 pipe = mc->pipe;
1071 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1072
1073 /* Version */
1074 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1075 /* Header */
1076 header = (struct tgsi_header*)&tokens[1];
1077 *header = tgsi_build_header();
1078 /* Processor */
1079 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1080
1081 ti = 3;
1082
1083 /*
1084 * decl i0 ; Vertex pos
1085 * decl i1 ; Luma texcoords
1086 * decl i2 ; Chroma Cb texcoords
1087 * decl i3 ; Chroma Cr texcoords
1088 * decl i4 ; Ref surface top field texcoords
1089 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1090 */
1091 for (i = 0; i < 6; i++)
1092 {
1093 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1094 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1095 }
1096
1097 /*
1098 * decl o0 ; Vertex pos
1099 * decl o1 ; Luma texcoords
1100 * decl o2 ; Chroma Cb texcoords
1101 * decl o3 ; Chroma Cr texcoords
1102 * decl o4 ; Ref macroblock texcoords
1103 */
1104 for (i = 0; i < 5; i++)
1105 {
1106 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1107 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1108 }
1109
1110 /*
1111 * mov o0, i0 ; Move input vertex pos to output
1112 * mov o1, i1 ; Move input luma texcoords to output
1113 * mov o2, i2 ; Move input chroma Cb texcoords to output
1114 * mov o3, i3 ; Move input chroma Cr texcoords to output
1115 */
1116 for (i = 0; i < 4; ++i)
1117 {
1118 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1119 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1120 }
1121
1122 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1123 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
1124 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1125
1126 /* end */
1127 inst = vl_end();
1128 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1129
1130 vs.tokens = tokens;
1131 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
1132 free(tokens);
1133
1134 return 0;
1135 }
1136
1137 static int vlCreateVertexShaderFieldPMB
1138 (
1139 struct vlR16SnormBufferedMC *mc
1140 )
1141 {
1142 const unsigned int max_tokens = 100;
1143
1144 struct pipe_context *pipe;
1145 struct pipe_shader_state vs;
1146 struct tgsi_token *tokens;
1147 struct tgsi_header *header;
1148
1149 struct tgsi_full_declaration decl;
1150 struct tgsi_full_instruction inst;
1151
1152 unsigned int ti;
1153 unsigned int i;
1154
1155 assert(mc);
1156
1157 pipe = mc->pipe;
1158 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1159
1160 /* Version */
1161 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1162 /* Header */
1163 header = (struct tgsi_header*)&tokens[1];
1164 *header = tgsi_build_header();
1165 /* Processor */
1166 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1167
1168 ti = 3;
1169
1170 /*
1171 * decl i0 ; Vertex pos
1172 * decl i1 ; Luma texcoords
1173 * decl i2 ; Chroma Cb texcoords
1174 * decl i3 ; Chroma Cr texcoords
1175 * decl i4 ; Ref macroblock top field texcoords
1176 * decl i5 ; Ref macroblock bottom field texcoords
1177 */
1178 for (i = 0; i < 6; i++)
1179 {
1180 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1181 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1182 }
1183
1184 /* decl c0 ; Render target dimensions */
1185 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1186 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1187
1188 /*
1189 * decl o0 ; Vertex pos
1190 * decl o1 ; Luma texcoords
1191 * decl o2 ; Chroma Cb texcoords
1192 * decl o3 ; Chroma Cr texcoords
1193 * decl o4 ; Ref macroblock top field texcoords
1194 * decl o5 ; Ref macroblock bottom field texcoords
1195 * decl o6 ; Denormalized vertex pos
1196 */
1197 for (i = 0; i < 7; i++)
1198 {
1199 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1200 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1201 }
1202
1203 /*
1204 * mov o0, i0 ; Move input vertex pos to output
1205 * mov o1, i1 ; Move input luma texcoords to output
1206 * mov o2, i2 ; Move input chroma Cb texcoords to output
1207 * mov o3, i3 ; Move input chroma Cr texcoords to output
1208 */
1209 for (i = 0; i < 4; ++i)
1210 {
1211 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1212 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1213 }
1214
1215 /*
1216 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1217 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1218 */
1219 for (i = 0; i < 2; ++i)
1220 {
1221 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1222 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1223 }
1224
1225 /* mul o6, i0, c0 ; Denorm vertex pos */
1226 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1227 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1228
1229 /* end */
1230 inst = vl_end();
1231 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1232
1233 vs.tokens = tokens;
1234 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1235 free(tokens);
1236
1237 return 0;
1238 }
1239
1240 static int vlCreateFragmentShaderFramePMB
1241 (
1242 struct vlR16SnormBufferedMC *mc
1243 )
1244 {
1245 const unsigned int max_tokens = 100;
1246
1247 struct pipe_context *pipe;
1248 struct pipe_shader_state fs;
1249 struct tgsi_token *tokens;
1250 struct tgsi_header *header;
1251
1252 struct tgsi_full_declaration decl;
1253 struct tgsi_full_instruction inst;
1254
1255 unsigned int ti;
1256 unsigned int i;
1257
1258 assert(mc);
1259
1260 pipe = mc->pipe;
1261 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1262
1263 /* Version */
1264 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1265 /* Header */
1266 header = (struct tgsi_header*)&tokens[1];
1267 *header = tgsi_build_header();
1268 /* Processor */
1269 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1270
1271 ti = 3;
1272
1273 /*
1274 * decl i0 ; Luma texcoords
1275 * decl i1 ; Chroma Cb texcoords
1276 * decl i2 ; Chroma Cr texcoords
1277 * decl i3 ; Ref macroblock texcoords
1278 */
1279 for (i = 0; i < 4; ++i)
1280 {
1281 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1282 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1283 }
1284
1285 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1286 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1287 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1288
1289 /* decl o0 ; Fragment color */
1290 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1291 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1292
1293 /* decl t0, t1 */
1294 decl = vl_decl_temps(0, 1);
1295 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1296
1297 /*
1298 * decl s0 ; Sampler for luma texture
1299 * decl s1 ; Sampler for chroma Cb texture
1300 * decl s2 ; Sampler for chroma Cr texture
1301 * decl s3 ; Sampler for ref surface texture
1302 */
1303 for (i = 0; i < 4; ++i)
1304 {
1305 decl = vl_decl_samplers(i, i);
1306 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1307 }
1308
1309 /*
1310 * tex2d t1, i0, s0 ; Read texel from luma texture
1311 * mov t0.x, t1.x ; Move luma sample into .x component
1312 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1313 * mov t0.y, t1.x ; Move Cb sample into .y component
1314 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1315 * mov t0.z, t1.x ; Move Cr sample into .z component
1316 */
1317 for (i = 0; i < 3; ++i)
1318 {
1319 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1320 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1321
1322 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1323 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1324 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1325 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1326 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1327 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1328 }
1329
1330 /* mul t0, t0, c0 ; Rescale texel to correct range */
1331 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1332 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1333
1334 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1335 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
1336 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1337
1338 /* add o0, t0, t1 ; Add ref and differential to form final output */
1339 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1340 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1341
1342 /* end */
1343 inst = vl_end();
1344 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1345
1346 fs.tokens = tokens;
1347 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1348 free(tokens);
1349
1350 return 0;
1351 }
1352
1353 static int vlCreateFragmentShaderFieldPMB
1354 (
1355 struct vlR16SnormBufferedMC *mc
1356 )
1357 {
1358 const unsigned int max_tokens = 200;
1359
1360 struct pipe_context *pipe;
1361 struct pipe_shader_state fs;
1362 struct tgsi_token *tokens;
1363 struct tgsi_header *header;
1364
1365 struct tgsi_full_declaration decl;
1366 struct tgsi_full_instruction inst;
1367
1368 unsigned int ti;
1369 unsigned int i;
1370
1371 assert(mc);
1372
1373 pipe = mc->pipe;
1374 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1375
1376 /* Version */
1377 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1378 /* Header */
1379 header = (struct tgsi_header*)&tokens[1];
1380 *header = tgsi_build_header();
1381 /* Processor */
1382 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1383
1384 ti = 3;
1385
1386 /*
1387 * decl i0 ; Luma texcoords
1388 * decl i1 ; Chroma Cb texcoords
1389 * decl i2 ; Chroma Cr texcoords
1390 * decl i3 ; Ref macroblock top field texcoords
1391 * decl i4 ; Ref macroblock bottom field texcoords
1392 * decl i5 ; Denormalized vertex pos
1393 */
1394 for (i = 0; i < 6; ++i)
1395 {
1396 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1397 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1398 }
1399
1400 /*
1401 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1402 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1403 */
1404 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1405 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1406
1407 /* decl o0 ; Fragment color */
1408 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1409 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1410
1411 /* decl t0-t4 */
1412 decl = vl_decl_temps(0, 4);
1413 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1414
1415 /*
1416 * decl s0 ; Sampler for luma texture
1417 * decl s1 ; Sampler for chroma Cb texture
1418 * decl s2 ; Sampler for chroma Cr texture
1419 * decl s3 ; Sampler for ref surface texture
1420 */
1421 for (i = 0; i < 4; ++i)
1422 {
1423 decl = vl_decl_samplers(i, i);
1424 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1425 }
1426
1427 /*
1428 * tex2d t1, i0, s0 ; Read texel from luma texture
1429 * mov t0.x, t1.x ; Move luma sample into .x component
1430 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1431 * mov t0.y, t1.x ; Move Cb sample into .y component
1432 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1433 * mov t0.z, t1.x ; Move Cr sample into .z component
1434 */
1435 for (i = 0; i < 3; ++i)
1436 {
1437 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1438 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1439
1440 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1441 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1442 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1443 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1444 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1445 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1446 }
1447
1448 /* mul t0, t0, c0 ; Rescale texel to correct range */
1449 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1450 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1451
1452 /*
1453 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1454 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1455 */
1456 for (i = 0; i < 2; ++i)
1457 {
1458 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1459 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1460 }
1461
1462 /* XXX: Pos values off by 0.5? */
1463 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1464 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1465 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1466 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1467 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1468 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1469 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1470 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1471 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1472 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1473 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1474
1475 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1476 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1477 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1478 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1479 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1480 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1481 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1482
1483 /* floor t3, t3 ; Get rid of fractional part */
1484 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1485 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1486
1487 /* mul t3, t3, c1.y ; Multiply by 2 */
1488 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1489 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1490 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1491 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1492 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1493 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1494
1495 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1496 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1497 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1498
1499 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1500 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1501 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1502 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1503
1504 /* add o0, t0, t1 ; Add ref and differential to form final output */
1505 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1506 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1507
1508 /* end */
1509 inst = vl_end();
1510 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1511
1512 fs.tokens = tokens;
1513 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1514 free(tokens);
1515
1516 return 0;
1517 }
1518
1519 static int vlCreateVertexShaderFrameBMB
1520 (
1521 struct vlR16SnormBufferedMC *mc
1522 )
1523 {
1524 const unsigned int max_tokens = 100;
1525
1526 struct pipe_context *pipe;
1527 struct pipe_shader_state vs;
1528 struct tgsi_token *tokens;
1529 struct tgsi_header *header;
1530
1531 struct tgsi_full_declaration decl;
1532 struct tgsi_full_instruction inst;
1533
1534 unsigned int ti;
1535 unsigned int i;
1536
1537 assert(mc);
1538
1539 pipe = mc->pipe;
1540 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1541
1542 /* Version */
1543 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1544 /* Header */
1545 header = (struct tgsi_header*)&tokens[1];
1546 *header = tgsi_build_header();
1547 /* Processor */
1548 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1549
1550 ti = 3;
1551
1552 /*
1553 * decl i0 ; Vertex pos
1554 * decl i1 ; Luma texcoords
1555 * decl i2 ; Chroma Cb texcoords
1556 * decl i3 ; Chroma Cr texcoords
1557 * decl i4 ; First ref macroblock top field texcoords
1558 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1559 * decl i6 ; Second ref macroblock top field texcoords
1560 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1561 */
1562 for (i = 0; i < 8; i++)
1563 {
1564 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1565 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1566 }
1567
1568 /*
1569 * decl o0 ; Vertex pos
1570 * decl o1 ; Luma texcoords
1571 * decl o2 ; Chroma Cb texcoords
1572 * decl o3 ; Chroma Cr texcoords
1573 * decl o4 ; First ref macroblock texcoords
1574 * decl o5 ; Second ref macroblock texcoords
1575 */
1576 for (i = 0; i < 6; i++)
1577 {
1578 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1579 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1580 }
1581
1582 /*
1583 * mov o0, i0 ; Move input vertex pos to output
1584 * mov o1, i1 ; Move input luma texcoords to output
1585 * mov o2, i2 ; Move input chroma Cb texcoords to output
1586 * mov o3, i3 ; Move input chroma Cr texcoords to output
1587 */
1588 for (i = 0; i < 4; ++i)
1589 {
1590 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1591 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1592 }
1593
1594 /*
1595 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1596 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1597 */
1598 for (i = 0; i < 2; ++i)
1599 {
1600 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
1601 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1602 }
1603
1604 /* end */
1605 inst = vl_end();
1606 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1607
1608 vs.tokens = tokens;
1609 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1610 free(tokens);
1611
1612 return 0;
1613 }
1614
1615 static int vlCreateVertexShaderFieldBMB
1616 (
1617 struct vlR16SnormBufferedMC *mc
1618 )
1619 {
1620 const unsigned int max_tokens = 100;
1621
1622 struct pipe_context *pipe;
1623 struct pipe_shader_state vs;
1624 struct tgsi_token *tokens;
1625 struct tgsi_header *header;
1626
1627 struct tgsi_full_declaration decl;
1628 struct tgsi_full_instruction inst;
1629
1630 unsigned int ti;
1631 unsigned int i;
1632
1633 assert(mc);
1634
1635 pipe = mc->pipe;
1636 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1637
1638 /* Version */
1639 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1640 /* Header */
1641 header = (struct tgsi_header*)&tokens[1];
1642 *header = tgsi_build_header();
1643 /* Processor */
1644 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1645
1646 ti = 3;
1647
1648 /*
1649 * decl i0 ; Vertex pos
1650 * decl i1 ; Luma texcoords
1651 * decl i2 ; Chroma Cb texcoords
1652 * decl i3 ; Chroma Cr texcoords
1653 * decl i4 ; First ref macroblock top field texcoords
1654 * decl i5 ; First ref macroblock bottom field texcoords
1655 * decl i6 ; Second ref macroblock top field texcoords
1656 * decl i7 ; Second ref macroblock bottom field texcoords
1657 */
1658 for (i = 0; i < 8; i++)
1659 {
1660 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1661 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1662 }
1663
1664 /* decl c0 ; Render target dimensions */
1665 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1666 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1667
1668 /*
1669 * decl o0 ; Vertex pos
1670 * decl o1 ; Luma texcoords
1671 * decl o2 ; Chroma Cb texcoords
1672 * decl o3 ; Chroma Cr texcoords
1673 * decl o4 ; First ref macroblock top field texcoords
1674 * decl o5 ; First ref macroblock Bottom field texcoords
1675 * decl o6 ; Second ref macroblock top field texcoords
1676 * decl o7 ; Second ref macroblock Bottom field texcoords
1677 * decl o8 ; Denormalized vertex pos
1678 */
1679 for (i = 0; i < 9; i++)
1680 {
1681 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1682 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1683 }
1684
1685 /* decl t0, t1 */
1686 decl = vl_decl_temps(0, 1);
1687 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1688
1689 /*
1690 * mov o0, i0 ; Move input vertex pos to output
1691 * mov o1, i1 ; Move input luma texcoords to output
1692 * mov o2, i2 ; Move input chroma Cb texcoords to output
1693 * mov o3, i3 ; Move input chroma Cr texcoords to output
1694 */
1695 for (i = 0; i < 4; ++i)
1696 {
1697 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1698 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1699 }
1700
1701 /*
1702 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1703 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1704 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1705 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1706 */
1707 for (i = 0; i < 4; ++i)
1708 {
1709 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1710 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1711 }
1712
1713 /* mul o8, i0, c0 ; Denorm vertex pos */
1714 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1715 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1716
1717 /* end */
1718 inst = vl_end();
1719 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1720
1721 vs.tokens = tokens;
1722 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1723 free(tokens);
1724
1725 return 0;
1726 }
1727
1728 static int vlCreateFragmentShaderFrameBMB
1729 (
1730 struct vlR16SnormBufferedMC *mc
1731 )
1732 {
1733 const unsigned int max_tokens = 100;
1734
1735 struct pipe_context *pipe;
1736 struct pipe_shader_state fs;
1737 struct tgsi_token *tokens;
1738 struct tgsi_header *header;
1739
1740 struct tgsi_full_declaration decl;
1741 struct tgsi_full_instruction inst;
1742
1743 unsigned int ti;
1744 unsigned int i;
1745
1746 assert(mc);
1747
1748 pipe = mc->pipe;
1749 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1750
1751 /* Version */
1752 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1753 /* Header */
1754 header = (struct tgsi_header*)&tokens[1];
1755 *header = tgsi_build_header();
1756 /* Processor */
1757 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1758
1759 ti = 3;
1760
1761 /*
1762 * decl i0 ; Luma texcoords
1763 * decl i1 ; Chroma Cb texcoords
1764 * decl i2 ; Chroma Cr texcoords
1765 * decl i3 ; First ref macroblock texcoords
1766 * decl i4 ; Second ref macroblock texcoords
1767 */
1768 for (i = 0; i < 5; ++i)
1769 {
1770 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1771 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1772 }
1773
1774 /*
1775 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1776 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1777 */
1778 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1779 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1780
1781 /* decl o0 ; Fragment color */
1782 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1783 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1784
1785 /* decl t0-t2 */
1786 decl = vl_decl_temps(0, 2);
1787 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1788
1789 /*
1790 * decl s0 ; Sampler for luma texture
1791 * decl s1 ; Sampler for chroma Cb texture
1792 * decl s2 ; Sampler for chroma Cr texture
1793 * decl s3 ; Sampler for first ref surface texture
1794 * decl s4 ; Sampler for second ref surface texture
1795 */
1796 for (i = 0; i < 5; ++i)
1797 {
1798 decl = vl_decl_samplers(i, i);
1799 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1800 }
1801
1802 /*
1803 * tex2d t1, i0, s0 ; Read texel from luma texture
1804 * mov t0.x, t1.x ; Move luma sample into .x component
1805 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1806 * mov t0.y, t1.x ; Move Cb sample into .y component
1807 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1808 * mov t0.z, t1.x ; Move Cr sample into .z component
1809 */
1810 for (i = 0; i < 3; ++i)
1811 {
1812 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1813 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1814
1815 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1816 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1817 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1818 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1819 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1820 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1821 }
1822
1823 /* mul t0, t0, c0 ; Rescale texel to correct range */
1824 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1825 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1826
1827 /*
1828 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1829 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1830 */
1831 for (i = 0; i < 2; ++i)
1832 {
1833 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
1834 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1835 }
1836
1837 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1838 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1839 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1840 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1841 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1842 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1843 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1844
1845 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1846 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1847 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1848
1849 /* end */
1850 inst = vl_end();
1851 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1852
1853 fs.tokens = tokens;
1854 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1855 free(tokens);
1856
1857 return 0;
1858 }
1859
1860 static int vlCreateFragmentShaderFieldBMB
1861 (
1862 struct vlR16SnormBufferedMC *mc
1863 )
1864 {
1865 const unsigned int max_tokens = 200;
1866
1867 struct pipe_context *pipe;
1868 struct pipe_shader_state fs;
1869 struct tgsi_token *tokens;
1870 struct tgsi_header *header;
1871
1872 struct tgsi_full_declaration decl;
1873 struct tgsi_full_instruction inst;
1874
1875 unsigned int ti;
1876 unsigned int i;
1877
1878 assert(mc);
1879
1880 pipe = mc->pipe;
1881 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1882
1883 /* Version */
1884 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1885 /* Header */
1886 header = (struct tgsi_header*)&tokens[1];
1887 *header = tgsi_build_header();
1888 /* Processor */
1889 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1890
1891 ti = 3;
1892
1893 /*
1894 * decl i0 ; Luma texcoords
1895 * decl i1 ; Chroma Cb texcoords
1896 * decl i2 ; Chroma Cr texcoords
1897 * decl i3 ; First ref macroblock top field texcoords
1898 * decl i4 ; First ref macroblock bottom field texcoords
1899 * decl i5 ; Second ref macroblock top field texcoords
1900 * decl i6 ; Second ref macroblock bottom field texcoords
1901 * decl i7 ; Denormalized vertex pos
1902 */
1903 for (i = 0; i < 8; ++i)
1904 {
1905 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1906 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1907 }
1908
1909 /*
1910 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1911 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1912 * ; and for Y-mod-2 top/bottom field selection
1913 */
1914 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1915 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1916
1917 /* decl o0 ; Fragment color */
1918 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1919 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1920
1921 /* decl t0-t5 */
1922 decl = vl_decl_temps(0, 5);
1923 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1924
1925 /*
1926 * decl s0 ; Sampler for luma texture
1927 * decl s1 ; Sampler for chroma Cb texture
1928 * decl s2 ; Sampler for chroma Cr texture
1929 * decl s3 ; Sampler for first ref surface texture
1930 * decl s4 ; Sampler for second ref surface texture
1931 */
1932 for (i = 0; i < 5; ++i)
1933 {
1934 decl = vl_decl_samplers(i, i);
1935 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1936 }
1937
1938 /*
1939 * tex2d t1, i0, s0 ; Read texel from luma texture
1940 * mov t0.x, t1.x ; Move luma sample into .x component
1941 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1942 * mov t0.y, t1.x ; Move Cb sample into .y component
1943 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1944 * mov t0.z, t1.x ; Move Cr sample into .z component
1945 */
1946 for (i = 0; i < 3; ++i)
1947 {
1948 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1949 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1950
1951 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1952 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1953 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1954 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1955 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1956 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1957 }
1958
1959 /* mul t0, t0, c0 ; Rescale texel to correct range */
1960 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1961 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1962
1963 /* XXX: Pos values off by 0.5? */
1964 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1965 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
1966 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1967 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1968 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1969 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1970 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1971 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1972 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1973 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1974 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1975
1976 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1977 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1978 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1979 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1980 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1981 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1982 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1983
1984 /* floor t3, t3 ; Get rid of fractional part */
1985 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1986 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1987
1988 /* mul t3, t3, c1.y ; Multiply by 2 */
1989 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1990 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1991 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1992 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1993 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1994 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1995
1996 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1997 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1998 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1999
2000 /*
2001 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
2002 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
2003 */
2004 for (i = 0; i < 2; ++i)
2005 {
2006 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
2007 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2008 }
2009
2010 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2011 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2012 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2013 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2014
2015 /*
2016 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2017 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2018 */
2019 for (i = 0; i < 2; ++i)
2020 {
2021 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
2022 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2023 }
2024
2025 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2026 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2027 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
2028 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2029
2030 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2031 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2032 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2033 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2034 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2035 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2036 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2037
2038 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2039 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
2040 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2041
2042 /* end */
2043 inst = vl_end();
2044 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2045
2046 fs.tokens = tokens;
2047 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
2048 free(tokens);
2049
2050 return 0;
2051 }
2052
2053 static int vlCreateDataBufs
2054 (
2055 struct vlR16SnormBufferedMC *mc
2056 )
2057 {
2058 const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
2059 const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
2060 const unsigned int num_mb_per_frame = mbw * mbh;
2061
2062 struct pipe_context *pipe;
2063 unsigned int h, i;
2064
2065 assert(mc);
2066
2067 pipe = mc->pipe;
2068
2069 /* Create our vertex buffers */
2070 for (h = 0; h < NUM_BUF_SETS; ++h)
2071 {
2072 mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4;
2073 mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1;
2074 mc->vertex_bufs[h][0].buffer_offset = 0;
2075 mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create
2076 (
2077 pipe->winsys,
2078 1,
2079 PIPE_BUFFER_USAGE_VERTEX,
2080 sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame
2081 );
2082
2083 for (i = 1; i < 3; ++i)
2084 {
2085 mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2;
2086 mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1;
2087 mc->vertex_bufs[h][i].buffer_offset = 0;
2088 mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create
2089 (
2090 pipe->winsys,
2091 1,
2092 PIPE_BUFFER_USAGE_VERTEX,
2093 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
2094 );
2095 }
2096 }
2097
2098 /* Position element */
2099 mc->vertex_elems[0].src_offset = 0;
2100 mc->vertex_elems[0].vertex_buffer_index = 0;
2101 mc->vertex_elems[0].nr_components = 2;
2102 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2103
2104 /* Luma, texcoord element */
2105 mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f);
2106 mc->vertex_elems[1].vertex_buffer_index = 0;
2107 mc->vertex_elems[1].nr_components = 2;
2108 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
2109
2110 /* Chroma Cr texcoord element */
2111 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2;
2112 mc->vertex_elems[2].vertex_buffer_index = 0;
2113 mc->vertex_elems[2].nr_components = 2;
2114 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
2115
2116 /* Chroma Cb texcoord element */
2117 mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3;
2118 mc->vertex_elems[3].vertex_buffer_index = 0;
2119 mc->vertex_elems[3].nr_components = 2;
2120 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
2121
2122 /* First ref surface top field texcoord element */
2123 mc->vertex_elems[4].src_offset = 0;
2124 mc->vertex_elems[4].vertex_buffer_index = 1;
2125 mc->vertex_elems[4].nr_components = 2;
2126 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
2127
2128 /* First ref surface bottom field texcoord element */
2129 mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f);
2130 mc->vertex_elems[5].vertex_buffer_index = 1;
2131 mc->vertex_elems[5].nr_components = 2;
2132 mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
2133
2134 /* Second ref surface top field texcoord element */
2135 mc->vertex_elems[6].src_offset = 0;
2136 mc->vertex_elems[6].vertex_buffer_index = 2;
2137 mc->vertex_elems[6].nr_components = 2;
2138 mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
2139
2140 /* Second ref surface bottom field texcoord element */
2141 mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f);
2142 mc->vertex_elems[7].vertex_buffer_index = 2;
2143 mc->vertex_elems[7].nr_components = 2;
2144 mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
2145
2146 /* Create our constant buffer */
2147 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
2148 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
2149 (
2150 pipe->winsys,
2151 1,
2152 PIPE_BUFFER_USAGE_CONSTANT,
2153 mc->vs_const_buf.size
2154 );
2155
2156 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
2157 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
2158 (
2159 pipe->winsys,
2160 1,
2161 PIPE_BUFFER_USAGE_CONSTANT,
2162 mc->fs_const_buf.size
2163 );
2164
2165 memcpy
2166 (
2167 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2168 &fs_consts,
2169 sizeof(struct vlFragmentShaderConsts)
2170 );
2171
2172 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
2173
2174 mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame);
2175
2176 return 0;
2177 }
2178
2179 static int vlInit
2180 (
2181 struct vlR16SnormBufferedMC *mc
2182 )
2183 {
2184 struct pipe_context *pipe;
2185 struct pipe_sampler_state sampler;
2186 struct pipe_texture template;
2187 unsigned int filters[5];
2188 unsigned int i;
2189
2190 assert(mc);
2191
2192 pipe = mc->pipe;
2193
2194 /* For MC we render to textures, which are rounded up to nearest POT */
2195 mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width);
2196 mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height);
2197 mc->viewport.scale[2] = 1;
2198 mc->viewport.scale[3] = 1;
2199 mc->viewport.translate[0] = 0;
2200 mc->viewport.translate[1] = 0;
2201 mc->viewport.translate[2] = 0;
2202 mc->viewport.translate[3] = 0;
2203
2204 mc->render_target.width = vlRoundUpPOT(mc->picture_width);
2205 mc->render_target.height = vlRoundUpPOT(mc->picture_height);
2206 mc->render_target.num_cbufs = 1;
2207 /* FB for MC stage is a vlSurface created by the user, set at render time */
2208 mc->render_target.zsbuf = NULL;
2209
2210 filters[0] = PIPE_TEX_FILTER_NEAREST;
2211 /* FIXME: Linear causes discoloration around block edges */
2212 filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2213 filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2214 filters[3] = PIPE_TEX_FILTER_LINEAR;
2215 filters[4] = PIPE_TEX_FILTER_LINEAR;
2216
2217 for (i = 0; i < 5; ++i)
2218 {
2219 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2220 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2221 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2222 sampler.min_img_filter = filters[i];
2223 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2224 sampler.mag_img_filter = filters[i];
2225 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2226 sampler.compare_func = PIPE_FUNC_ALWAYS;
2227 sampler.normalized_coords = 1;
2228 /*sampler.prefilter = ;*/
2229 /*sampler.shadow_ambient = ;*/
2230 /*sampler.lod_bias = ;*/
2231 sampler.min_lod = 0;
2232 /*sampler.max_lod = ;*/
2233 /*sampler.border_color[i] = ;*/
2234 /*sampler.max_anisotropy = ;*/
2235 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2236 }
2237
2238 memset(&template, 0, sizeof(struct pipe_texture));
2239 template.target = PIPE_TEXTURE_2D;
2240 template.format = PIPE_FORMAT_R16_SNORM;
2241 template.last_level = 0;
2242 template.width[0] = vlRoundUpPOT(mc->picture_width);
2243 template.height[0] = vlRoundUpPOT(mc->picture_height);
2244 template.depth[0] = 1;
2245 template.compressed = 0;
2246 pf_get_block(template.format, &template.block);
2247
2248 for (i = 0; i < NUM_BUF_SETS; ++i)
2249 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2250
2251 if (mc->picture_format == vlFormatYCbCr420)
2252 {
2253 template.width[0] = vlRoundUpPOT(mc->picture_width / 2);
2254 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2255 }
2256 else if (mc->picture_format == vlFormatYCbCr422)
2257 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2258
2259 for (i = 0; i < NUM_BUF_SETS; ++i)
2260 {
2261 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2262 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2263 }
2264
2265 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2266
2267 vlCreateVertexShaderIMB(mc);
2268 vlCreateFragmentShaderIMB(mc);
2269 vlCreateVertexShaderFramePMB(mc);
2270 vlCreateVertexShaderFieldPMB(mc);
2271 vlCreateFragmentShaderFramePMB(mc);
2272 vlCreateFragmentShaderFieldPMB(mc);
2273 vlCreateVertexShaderFrameBMB(mc);
2274 vlCreateVertexShaderFieldBMB(mc);
2275 vlCreateFragmentShaderFrameBMB(mc);
2276 vlCreateFragmentShaderFieldBMB(mc);
2277 vlCreateDataBufs(mc);
2278
2279 return 0;
2280 }
2281
2282 int vlCreateR16SNormBufferedMC
2283 (
2284 struct pipe_context *pipe,
2285 unsigned int picture_width,
2286 unsigned int picture_height,
2287 enum vlFormat picture_format,
2288 struct vlRender **render
2289 )
2290 {
2291 struct vlR16SnormBufferedMC *mc;
2292 unsigned int i;
2293
2294 assert(pipe);
2295 assert(render);
2296
2297 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2298
2299 mc->base.vlBegin = &vlBegin;
2300 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2301 mc->base.vlEnd = &vlEnd;
2302 mc->base.vlFlush = &vlFlush;
2303 mc->base.vlDestroy = &vlDestroy;
2304 mc->pipe = pipe;
2305 mc->picture_width = picture_width;
2306 mc->picture_height = picture_height;
2307
2308 mc->cur_buf = 0;
2309 mc->buffered_surface = NULL;
2310 mc->past_surface = NULL;
2311 mc->future_surface = NULL;
2312 for (i = 0; i < 3; ++i)
2313 mc->zero_block[i].x = -1.0f;
2314 mc->num_macroblocks = 0;
2315
2316 vlInit(mc);
2317
2318 *render = &mc->base;
2319
2320 return 0;
2321 }