g3dvl: Fix field coded block copy.
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_inlines.h>
10 #include <tgsi/tgsi_parse.h>
11 #include <tgsi/tgsi_build.h>
12 #include <util/u_math.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 /*
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
27 * than we destroy.
28 */
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
30
31 enum vlMacroBlockTypeEx
32 {
33 vlMacroBlockExTypeIntra,
34 vlMacroBlockExTypeFwdPredictedFrame,
35 vlMacroBlockExTypeFwdPredictedField,
36 vlMacroBlockExTypeBkwdPredictedFrame,
37 vlMacroBlockExTypeBkwdPredictedField,
38 vlMacroBlockExTypeBiPredictedFrame,
39 vlMacroBlockExTypeBiPredictedField,
40
41 vlNumMacroBlockExTypes
42 };
43
44 struct vlVertexShaderConsts
45 {
46 struct vlVertex4f denorm;
47 };
48
49 struct vlFragmentShaderConsts
50 {
51 struct vlVertex4f multiplier;
52 struct vlVertex4f div;
53 };
54
55 struct vlR16SnormBufferedMC
56 {
57 struct vlRender base;
58
59 unsigned int picture_width, picture_height;
60 enum vlFormat picture_format;
61
62 unsigned int cur_buf;
63 struct vlSurface *buffered_surface;
64 struct vlSurface *past_surface, *future_surface;
65 struct vlVertex2f surface_tex_inv_size;
66 struct vlVertex2f zero_block[3];
67 unsigned int num_macroblocks;
68 struct vlMpeg2MacroBlock *macroblocks;
69
70 struct pipe_context *pipe;
71 struct pipe_viewport_state viewport;
72 struct pipe_framebuffer_state render_target;
73 struct pipe_sampler_state *samplers[5];
74 struct pipe_texture *textures[NUM_BUF_SETS][5];
75 void *i_vs, *p_vs[2], *b_vs[2];
76 void *i_fs, *p_fs[2], *b_fs[2];
77 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3];
78 struct pipe_vertex_element vertex_elems[8];
79 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
80 };
81
82 static int vlBegin
83 (
84 struct vlRender *render
85 )
86 {
87 assert(render);
88
89 return 0;
90 }
91
92 static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
93 {
94 unsigned int y;
95
96 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
97 memcpy
98 (
99 dst + y * dst_pitch,
100 src + y * VL_BLOCK_WIDTH,
101 VL_BLOCK_WIDTH * 2
102 );
103
104 return 0;
105 }
106
107 static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
108 {
109 unsigned int y;
110
111 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
112 memcpy
113 (
114 dst + y * dst_pitch * 2,
115 src + y * VL_BLOCK_WIDTH,
116 VL_BLOCK_WIDTH * 2
117 );
118
119 return 0;
120 }
121
122 static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
123 {
124 unsigned int y;
125
126 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
127 memset
128 (
129 dst + y * dst_pitch,
130 0,
131 VL_BLOCK_WIDTH * 2
132 );
133
134 return 0;
135 }
136
137 static inline int vlGrabBlocks
138 (
139 struct vlR16SnormBufferedMC *mc,
140 unsigned int mbx,
141 unsigned int mby,
142 enum vlDCTType dct_type,
143 unsigned int coded_block_pattern,
144 short *blocks
145 )
146 {
147 struct pipe_surface *tex_surface;
148 short *texels;
149 unsigned int tex_pitch;
150 unsigned int x, y, tb = 0, sb = 0;
151 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
152
153 assert(mc);
154 assert(blocks);
155
156 tex_surface = mc->pipe->screen->get_tex_surface
157 (
158 mc->pipe->screen,
159 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
161 );
162
163 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
164 tex_pitch = tex_surface->stride / tex_surface->block.size;
165
166 texels += mbpy * tex_pitch + mbpx;
167
168 for (y = 0; y < 2; ++y)
169 {
170 for (x = 0; x < 2; ++x, ++tb)
171 {
172 if ((coded_block_pattern >> (5 - tb)) & 1)
173 {
174 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
175
176 if (dct_type == vlDCTTypeFrameCoded)
177 {
178 vlGrabFrameCodedBlock
179 (
180 cur_block,
181 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
182 tex_pitch
183 );
184 }
185 else
186 {
187 vlGrabFieldCodedBlock
188 (
189 cur_block,
190 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
191 tex_pitch
192 );
193 }
194
195 ++sb;
196 }
197 else if (mc->zero_block[0].x < 0.0f)
198 {
199 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
200
201 mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x;
202 mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y;
203 }
204 }
205 }
206
207 pipe_surface_unmap(tex_surface);
208
209 /* TODO: Implement 422, 444 */
210 mbpx >>= 1;
211 mbpy >>= 1;
212
213 for (tb = 0; tb < 2; ++tb)
214 {
215 tex_surface = mc->pipe->screen->get_tex_surface
216 (
217 mc->pipe->screen,
218 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
219 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
220 );
221
222 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
223 tex_pitch = tex_surface->stride / tex_surface->block.size;
224
225 texels += mbpy * tex_pitch + mbpx;
226
227 if ((coded_block_pattern >> (1 - tb)) & 1)
228 {
229 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
230
231 vlGrabFrameCodedBlock
232 (
233 cur_block,
234 texels,
235 tex_pitch
236 );
237
238 ++sb;
239 }
240 else if (mc->zero_block[tb + 1].x < 0.0f)
241 {
242 vlGrabNoBlock(texels, tex_pitch);
243
244 mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x;
245 mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y;
246 }
247
248 pipe_surface_unmap(tex_surface);
249 }
250
251 return 0;
252 }
253
254 static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb)
255 {
256 assert(mb);
257
258 switch (mb->mb_type)
259 {
260 case vlMacroBlockTypeIntra:
261 return vlMacroBlockExTypeIntra;
262 case vlMacroBlockTypeFwdPredicted:
263 return mb->mo_type == vlMotionTypeFrame ?
264 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
265 case vlMacroBlockTypeBkwdPredicted:
266 return mb->mo_type == vlMotionTypeFrame ?
267 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
268 case vlMacroBlockTypeBiPredicted:
269 return mb->mo_type == vlMotionTypeFrame ?
270 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
271 default:
272 assert(0);
273 }
274
275 /* Unreachable */
276 return -1;
277 }
278
279 static inline int vlGrabMacroBlock
280 (
281 struct vlR16SnormBufferedMC *mc,
282 struct vlMpeg2MacroBlock *macroblock
283 )
284 {
285 assert(mc);
286 assert(macroblock);
287
288 mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx;
289 mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby;
290 mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type;
291 mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type;
292 mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type;
293 mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0];
294 mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1];
295 mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0];
296 mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1];
297 mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0];
298 mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1];
299 mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0];
300 mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1];
301 mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp;
302 mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks;
303
304 vlGrabBlocks
305 (
306 mc,
307 macroblock->mbx,
308 macroblock->mby,
309 macroblock->dct_type,
310 macroblock->cbp,
311 macroblock->blocks
312 );
313
314 mc->num_macroblocks++;
315
316 return 0;
317 }
318
319 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
320 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
321 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
322 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
323 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
324 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
325 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
326 \
327 if ((cbp) & (lm)) \
328 { \
329 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
330 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
331 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
332 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
333 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
334 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
335 } \
336 else \
337 { \
338 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
339 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
340 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
341 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
342 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
343 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
344 } \
345 \
346 if ((cbp) & (cbm)) \
347 { \
348 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
349 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
350 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
351 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
352 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
353 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
354 } \
355 else \
356 { \
357 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
358 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
359 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
360 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
361 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
362 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
363 } \
364 \
365 if ((cbp) & (crm)) \
366 { \
367 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
368 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
369 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
370 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
371 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
372 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
373 } \
374 else \
375 { \
376 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
377 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
378 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
379 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
380 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
381 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
382 }
383
384 static inline int vlGrabMacroBlockVB
385 (
386 struct vlR16SnormBufferedMC *mc,
387 struct vlMpeg2MacroBlock *macroblock,
388 unsigned int pos
389 )
390 {
391 struct vlVertex2f mo_vec[2];
392 unsigned int i;
393
394 assert(mc);
395 assert(macroblock);
396
397 switch (macroblock->mb_type)
398 {
399 case vlMacroBlockTypeBiPredicted:
400 {
401 struct vlVertex2f *vb;
402
403 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
404 (
405 mc->pipe->winsys,
406 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer,
407 PIPE_BUFFER_USAGE_CPU_WRITE
408 ) + pos * 2 * 24;
409
410 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
411 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
412
413 if (macroblock->mo_type == vlMotionTypeFrame)
414 {
415 for (i = 0; i < 24 * 2; i += 2)
416 {
417 vb[i].x = mo_vec[0].x;
418 vb[i].y = mo_vec[0].y;
419 }
420 }
421 else
422 {
423 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
424 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
425
426 for (i = 0; i < 24 * 2; i += 2)
427 {
428 vb[i].x = mo_vec[0].x;
429 vb[i].y = mo_vec[0].y;
430 vb[i + 1].x = mo_vec[1].x;
431 vb[i + 1].y = mo_vec[1].y;
432 }
433 }
434
435 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer);
436
437 /* fall-through */
438 }
439 case vlMacroBlockTypeFwdPredicted:
440 case vlMacroBlockTypeBkwdPredicted:
441 {
442 struct vlVertex2f *vb;
443
444 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
445 (
446 mc->pipe->winsys,
447 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer,
448 PIPE_BUFFER_USAGE_CPU_WRITE
449 ) + pos * 2 * 24;
450
451 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
452 {
453 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
454 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
455
456 if (macroblock->mo_type == vlMotionTypeField)
457 {
458 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
459 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
460 }
461 }
462 else
463 {
464 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
465 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
466
467 if (macroblock->mo_type == vlMotionTypeField)
468 {
469 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
470 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
471 }
472 }
473
474 if (macroblock->mo_type == vlMotionTypeFrame)
475 {
476 for (i = 0; i < 24 * 2; i += 2)
477 {
478 vb[i].x = mo_vec[0].x;
479 vb[i].y = mo_vec[0].y;
480 }
481 }
482 else
483 {
484 for (i = 0; i < 24 * 2; i += 2)
485 {
486 vb[i].x = mo_vec[0].x;
487 vb[i].y = mo_vec[0].y;
488 vb[i + 1].x = mo_vec[1].x;
489 vb[i + 1].y = mo_vec[1].y;
490 }
491 }
492
493 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer);
494
495 /* fall-through */
496 }
497 case vlMacroBlockTypeIntra:
498 {
499 const struct vlVertex2f unit =
500 {
501 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
502 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
503 };
504 const struct vlVertex2f half =
505 {
506 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
507 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
508 };
509
510 struct vlMacroBlockVertexStream0
511 {
512 struct vlVertex2f pos;
513 struct vlVertex2f luma_tc;
514 struct vlVertex2f cb_tc;
515 struct vlVertex2f cr_tc;
516 } *vb;
517
518 vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map
519 (
520 mc->pipe->winsys,
521 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer,
522 PIPE_BUFFER_USAGE_CPU_WRITE
523 ) + pos * 24;
524
525 SET_BLOCK
526 (
527 vb,
528 macroblock->cbp, macroblock->mbx, macroblock->mby,
529 unit.x, unit.y, 0, 0, half.x, half.y,
530 32, 2, 1, mc->zero_block
531 );
532
533 SET_BLOCK
534 (
535 vb + 6,
536 macroblock->cbp, macroblock->mbx, macroblock->mby,
537 unit.x, unit.y, half.x, 0, half.x, half.y,
538 16, 2, 1, mc->zero_block
539 );
540
541 SET_BLOCK
542 (
543 vb + 12,
544 macroblock->cbp, macroblock->mbx, macroblock->mby,
545 unit.x, unit.y, 0, half.y, half.x, half.y,
546 8, 2, 1, mc->zero_block
547 );
548
549 SET_BLOCK
550 (
551 vb + 18,
552 macroblock->cbp, macroblock->mbx, macroblock->mby,
553 unit.x, unit.y, half.x, half.y, half.x, half.y,
554 4, 2, 1, mc->zero_block
555 );
556
557 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer);
558
559 break;
560 }
561 default:
562 assert(0);
563 }
564
565 return 0;
566 }
567
568 static int vlFlush
569 (
570 struct vlRender *render
571 )
572 {
573 struct vlR16SnormBufferedMC *mc;
574 struct pipe_context *pipe;
575 struct vlVertexShaderConsts *vs_consts;
576 unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0};
577 unsigned int offset[vlNumMacroBlockExTypes];
578 unsigned int vb_start = 0;
579 unsigned int i;
580
581 assert(render);
582
583 mc = (struct vlR16SnormBufferedMC*)render;
584
585 if (!mc->buffered_surface)
586 return 0;
587
588 pipe = mc->pipe;
589
590 for (i = 0; i < mc->num_macroblocks; ++i)
591 {
592 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
593
594 num_macroblocks[mb_type_ex]++;
595 }
596
597 offset[0] = 0;
598
599 for (i = 1; i < vlNumMacroBlockExTypes; ++i)
600 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
601
602 for (i = 0; i < mc->num_macroblocks; ++i)
603 {
604 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
605
606 vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]);
607
608 offset[mb_type_ex]++;
609 }
610
611 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
612 (
613 pipe->screen,
614 mc->buffered_surface->texture,
615 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
616 );
617
618 pipe->set_framebuffer_state(pipe, &mc->render_target);
619 pipe->set_viewport_state(pipe, &mc->viewport);
620 vs_consts = pipe->winsys->buffer_map
621 (
622 pipe->winsys,
623 mc->vs_const_buf.buffer,
624 PIPE_BUFFER_USAGE_CPU_WRITE
625 );
626
627 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
628 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
629
630 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
631 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
632 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
633
634 if (num_macroblocks[vlMacroBlockExTypeIntra] > 0)
635 {
636 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
637 pipe->set_vertex_elements(pipe, 4, mc->vertex_elems);
638 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
639 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
640 pipe->bind_vs_state(pipe, mc->i_vs);
641 pipe->bind_fs_state(pipe, mc->i_fs);
642
643 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24);
644 vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24;
645 }
646
647 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
648 {
649 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
650 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
651 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
652 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
653 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
654 pipe->bind_vs_state(pipe, mc->p_vs[0]);
655 pipe->bind_fs_state(pipe, mc->p_fs[0]);
656
657 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
658 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24;
659 }
660
661 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
662 {
663 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
664 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
665 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
666 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
667 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
668 pipe->bind_vs_state(pipe, mc->p_vs[1]);
669 pipe->bind_fs_state(pipe, mc->p_fs[1]);
670
671 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
672 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24;
673 }
674
675 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
676 {
677 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
678 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
679 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
680 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
681 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
682 pipe->bind_vs_state(pipe, mc->p_vs[0]);
683 pipe->bind_fs_state(pipe, mc->p_fs[0]);
684
685 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
686 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24;
687 }
688
689 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
690 {
691 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
692 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
693 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
694 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
695 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
696 pipe->bind_vs_state(pipe, mc->p_vs[1]);
697 pipe->bind_fs_state(pipe, mc->p_fs[1]);
698
699 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
700 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24;
701 }
702
703 if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
704 {
705 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
706 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
707 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
708 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
709 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
710 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
711 pipe->bind_vs_state(pipe, mc->b_vs[0]);
712 pipe->bind_fs_state(pipe, mc->b_fs[0]);
713
714 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
715 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24;
716 }
717
718 if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
719 {
720 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
721 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
722 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
723 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
724 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
725 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
726 pipe->bind_vs_state(pipe, mc->b_vs[1]);
727 pipe->bind_fs_state(pipe, mc->b_fs[1]);
728
729 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
730 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24;
731 }
732
733 pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence);
734
735 for (i = 0; i < 3; ++i)
736 mc->zero_block[i].x = -1.0f;
737
738 mc->buffered_surface = NULL;
739 mc->num_macroblocks = 0;
740 mc->cur_buf++;
741
742 return 0;
743 }
744
745 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
746 (
747 struct vlRender *render,
748 struct vlMpeg2MacroBlockBatch *batch,
749 struct vlSurface *surface
750 )
751 {
752 struct vlR16SnormBufferedMC *mc;
753 unsigned int i;
754
755 assert(render);
756
757 mc = (struct vlR16SnormBufferedMC*)render;
758
759 if (mc->buffered_surface)
760 {
761 if (mc->buffered_surface != surface)
762 {
763 vlFlush(&mc->base);
764 mc->buffered_surface = surface;
765 mc->past_surface = batch->past_surface;
766 mc->future_surface = batch->future_surface;
767 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
768 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
769 }
770 }
771 else
772 {
773 mc->buffered_surface = surface;
774 mc->past_surface = batch->past_surface;
775 mc->future_surface = batch->future_surface;
776 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
777 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
778 }
779
780 for (i = 0; i < batch->num_macroblocks; ++i)
781 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
782
783 return 0;
784 }
785
786 static int vlEnd
787 (
788 struct vlRender *render
789 )
790 {
791 assert(render);
792
793 return 0;
794 }
795
796 static int vlDestroy
797 (
798 struct vlRender *render
799 )
800 {
801 struct vlR16SnormBufferedMC *mc;
802 struct pipe_context *pipe;
803 unsigned int h, i;
804
805 assert(render);
806
807 mc = (struct vlR16SnormBufferedMC*)render;
808 pipe = mc->pipe;
809
810 for (i = 0; i < 5; ++i)
811 pipe->delete_sampler_state(pipe, mc->samplers[i]);
812
813 for (h = 0; h < NUM_BUF_SETS; ++h)
814 for (i = 0; i < 3; ++i)
815 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer);
816
817 /* Textures 3 & 4 are not created directly, no need to release them here */
818 for (i = 0; i < NUM_BUF_SETS; ++i)
819 {
820 pipe_texture_release(&mc->textures[i][0]);
821 pipe_texture_release(&mc->textures[i][1]);
822 pipe_texture_release(&mc->textures[i][2]);
823 }
824
825 pipe->delete_vs_state(pipe, mc->i_vs);
826 pipe->delete_fs_state(pipe, mc->i_fs);
827
828 for (i = 0; i < 2; ++i)
829 {
830 pipe->delete_vs_state(pipe, mc->p_vs[i]);
831 pipe->delete_fs_state(pipe, mc->p_fs[i]);
832 pipe->delete_vs_state(pipe, mc->b_vs[i]);
833 pipe->delete_fs_state(pipe, mc->b_fs[i]);
834 }
835
836 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
837 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
838
839 free(mc->macroblocks);
840 free(mc);
841
842 return 0;
843 }
844
845 /*
846 * Muliplier renormalizes block samples from 16 bits to 12 bits.
847 * Divider is used when calculating Y % 2 for choosing top or bottom
848 * field for P or B macroblocks.
849 * TODO: Use immediates.
850 */
851 static const struct vlFragmentShaderConsts fs_consts =
852 {
853 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
854 {0.5f, 2.0f, 0.0f, 0.0f}
855 };
856
857 static int vlCreateVertexShaderIMB
858 (
859 struct vlR16SnormBufferedMC *mc
860 )
861 {
862 const unsigned int max_tokens = 50;
863
864 struct pipe_context *pipe;
865 struct pipe_shader_state vs;
866 struct tgsi_token *tokens;
867 struct tgsi_header *header;
868
869 struct tgsi_full_declaration decl;
870 struct tgsi_full_instruction inst;
871
872 unsigned int ti;
873 unsigned int i;
874
875 assert(mc);
876
877 pipe = mc->pipe;
878 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
879
880 /* Version */
881 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
882 /* Header */
883 header = (struct tgsi_header*)&tokens[1];
884 *header = tgsi_build_header();
885 /* Processor */
886 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
887
888 ti = 3;
889
890 /*
891 * decl i0 ; Vertex pos
892 * decl i1 ; Luma texcoords
893 * decl i2 ; Chroma Cb texcoords
894 * decl i3 ; Chroma Cr texcoords
895 */
896 for (i = 0; i < 4; i++)
897 {
898 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
899 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
900 }
901
902 /*
903 * decl o0 ; Vertex pos
904 * decl o1 ; Luma texcoords
905 * decl o2 ; Chroma Cb texcoords
906 * decl o3 ; Chroma Cr texcoords
907 */
908 for (i = 0; i < 4; i++)
909 {
910 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
911 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
912 }
913
914 /*
915 * mov o0, i0 ; Move input vertex pos to output
916 * mov o1, i1 ; Move input luma texcoords to output
917 * mov o2, i2 ; Move input chroma Cb texcoords to output
918 * mov o3, i3 ; Move input chroma Cr texcoords to output
919 */
920 for (i = 0; i < 4; ++i)
921 {
922 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
923 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
924 }
925
926 /* end */
927 inst = vl_end();
928 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
929
930 vs.tokens = tokens;
931 mc->i_vs = pipe->create_vs_state(pipe, &vs);
932 free(tokens);
933
934 return 0;
935 }
936
937 static int vlCreateFragmentShaderIMB
938 (
939 struct vlR16SnormBufferedMC *mc
940 )
941 {
942 const unsigned int max_tokens = 100;
943
944 struct pipe_context *pipe;
945 struct pipe_shader_state fs;
946 struct tgsi_token *tokens;
947 struct tgsi_header *header;
948
949 struct tgsi_full_declaration decl;
950 struct tgsi_full_instruction inst;
951
952 unsigned int ti;
953 unsigned int i;
954
955 assert(mc);
956
957 pipe = mc->pipe;
958 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
959
960 /* Version */
961 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
962 /* Header */
963 header = (struct tgsi_header*)&tokens[1];
964 *header = tgsi_build_header();
965 /* Processor */
966 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
967
968 ti = 3;
969
970 /*
971 * decl i0 ; Luma texcoords
972 * decl i1 ; Chroma Cb texcoords
973 * decl i2 ; Chroma Cr texcoords
974 */
975 for (i = 0; i < 3; ++i)
976 {
977 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, i, i, TGSI_INTERPOLATE_LINEAR);
978 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
979 }
980
981 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
982 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
983 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
984
985 /* decl o0 ; Fragment color */
986 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
987 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
988
989 /* decl t0, t1 */
990 decl = vl_decl_temps(0, 1);
991 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
992
993 /*
994 * decl s0 ; Sampler for luma texture
995 * decl s1 ; Sampler for chroma Cb texture
996 * decl s2 ; Sampler for chroma Cr texture
997 */
998 for (i = 0; i < 3; ++i)
999 {
1000 decl = vl_decl_samplers(i, i);
1001 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
1002 }
1003
1004 /*
1005 * tex2d t1, i0, s0 ; Read texel from luma texture
1006 * mov t0.x, t1.x ; Move luma sample into .x component
1007 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1008 * mov t0.y, t1.x ; Move Cb sample into .y component
1009 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1010 * mov t0.z, t1.x ; Move Cr sample into .z component
1011 */
1012 for (i = 0; i < 3; ++i)
1013 {
1014 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1015 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1016
1017 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1018 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1019 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1020 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1021 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1022 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1023 }
1024
1025 /* mul o0, t0, c0 ; Rescale texel to correct range */
1026 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1027 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1028
1029 /* end */
1030 inst = vl_end();
1031 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1032
1033 fs.tokens = tokens;
1034 mc->i_fs = pipe->create_fs_state(pipe, &fs);
1035 free(tokens);
1036
1037 return 0;
1038 }
1039
1040 static int vlCreateVertexShaderFramePMB
1041 (
1042 struct vlR16SnormBufferedMC *mc
1043 )
1044 {
1045 const unsigned int max_tokens = 100;
1046
1047 struct pipe_context *pipe;
1048 struct pipe_shader_state vs;
1049 struct tgsi_token *tokens;
1050 struct tgsi_header *header;
1051
1052 struct tgsi_full_declaration decl;
1053 struct tgsi_full_instruction inst;
1054
1055 unsigned int ti;
1056 unsigned int i;
1057
1058 assert(mc);
1059
1060 pipe = mc->pipe;
1061 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1062
1063 /* Version */
1064 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1065 /* Header */
1066 header = (struct tgsi_header*)&tokens[1];
1067 *header = tgsi_build_header();
1068 /* Processor */
1069 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1070
1071 ti = 3;
1072
1073 /*
1074 * decl i0 ; Vertex pos
1075 * decl i1 ; Luma texcoords
1076 * decl i2 ; Chroma Cb texcoords
1077 * decl i3 ; Chroma Cr texcoords
1078 * decl i4 ; Ref surface top field texcoords
1079 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1080 */
1081 for (i = 0; i < 6; i++)
1082 {
1083 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1084 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1085 }
1086
1087 /*
1088 * decl o0 ; Vertex pos
1089 * decl o1 ; Luma texcoords
1090 * decl o2 ; Chroma Cb texcoords
1091 * decl o3 ; Chroma Cr texcoords
1092 * decl o4 ; Ref macroblock texcoords
1093 */
1094 for (i = 0; i < 5; i++)
1095 {
1096 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1097 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1098 }
1099
1100 /*
1101 * mov o0, i0 ; Move input vertex pos to output
1102 * mov o1, i1 ; Move input luma texcoords to output
1103 * mov o2, i2 ; Move input chroma Cb texcoords to output
1104 * mov o3, i3 ; Move input chroma Cr texcoords to output
1105 */
1106 for (i = 0; i < 4; ++i)
1107 {
1108 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1109 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1110 }
1111
1112 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1113 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
1114 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1115
1116 /* end */
1117 inst = vl_end();
1118 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1119
1120 vs.tokens = tokens;
1121 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
1122 free(tokens);
1123
1124 return 0;
1125 }
1126
1127 static int vlCreateVertexShaderFieldPMB
1128 (
1129 struct vlR16SnormBufferedMC *mc
1130 )
1131 {
1132 const unsigned int max_tokens = 100;
1133
1134 struct pipe_context *pipe;
1135 struct pipe_shader_state vs;
1136 struct tgsi_token *tokens;
1137 struct tgsi_header *header;
1138
1139 struct tgsi_full_declaration decl;
1140 struct tgsi_full_instruction inst;
1141
1142 unsigned int ti;
1143 unsigned int i;
1144
1145 assert(mc);
1146
1147 pipe = mc->pipe;
1148 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1149
1150 /* Version */
1151 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1152 /* Header */
1153 header = (struct tgsi_header*)&tokens[1];
1154 *header = tgsi_build_header();
1155 /* Processor */
1156 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1157
1158 ti = 3;
1159
1160 /*
1161 * decl i0 ; Vertex pos
1162 * decl i1 ; Luma texcoords
1163 * decl i2 ; Chroma Cb texcoords
1164 * decl i3 ; Chroma Cr texcoords
1165 * decl i4 ; Ref macroblock top field texcoords
1166 * decl i5 ; Ref macroblock bottom field texcoords
1167 */
1168 for (i = 0; i < 6; i++)
1169 {
1170 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1171 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1172 }
1173
1174 /* decl c0 ; Render target dimensions */
1175 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1176 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1177
1178 /*
1179 * decl o0 ; Vertex pos
1180 * decl o1 ; Luma texcoords
1181 * decl o2 ; Chroma Cb texcoords
1182 * decl o3 ; Chroma Cr texcoords
1183 * decl o4 ; Ref macroblock top field texcoords
1184 * decl o5 ; Ref macroblock bottom field texcoords
1185 * decl o6 ; Denormalized vertex pos
1186 */
1187 for (i = 0; i < 7; i++)
1188 {
1189 decl = vl_decl_output((i == 0 || i == 6) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1190 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1191 }
1192
1193 /*
1194 * mov o0, i0 ; Move input vertex pos to output
1195 * mov o1, i1 ; Move input luma texcoords to output
1196 * mov o2, i2 ; Move input chroma Cb texcoords to output
1197 * mov o3, i3 ; Move input chroma Cr texcoords to output
1198 */
1199 for (i = 0; i < 4; ++i)
1200 {
1201 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1202 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1203 }
1204
1205 /*
1206 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1207 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1208 */
1209 for (i = 0; i < 2; ++i)
1210 {
1211 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1212 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1213 }
1214
1215 /* mul o6, i0, c0 ; Denorm vertex pos */
1216 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1217 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1218
1219 /* end */
1220 inst = vl_end();
1221 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1222
1223 vs.tokens = tokens;
1224 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1225 free(tokens);
1226
1227 return 0;
1228 }
1229
1230 static int vlCreateFragmentShaderFramePMB
1231 (
1232 struct vlR16SnormBufferedMC *mc
1233 )
1234 {
1235 const unsigned int max_tokens = 100;
1236
1237 struct pipe_context *pipe;
1238 struct pipe_shader_state fs;
1239 struct tgsi_token *tokens;
1240 struct tgsi_header *header;
1241
1242 struct tgsi_full_declaration decl;
1243 struct tgsi_full_instruction inst;
1244
1245 unsigned int ti;
1246 unsigned int i;
1247
1248 assert(mc);
1249
1250 pipe = mc->pipe;
1251 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1252
1253 /* Version */
1254 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1255 /* Header */
1256 header = (struct tgsi_header*)&tokens[1];
1257 *header = tgsi_build_header();
1258 /* Processor */
1259 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1260
1261 ti = 3;
1262
1263 /*
1264 * decl i0 ; Luma texcoords
1265 * decl i1 ; Chroma Cb texcoords
1266 * decl i2 ; Chroma Cr texcoords
1267 * decl i3 ; Ref macroblock texcoords
1268 */
1269 for (i = 0; i < 4; ++i)
1270 {
1271 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1272 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1273 }
1274
1275 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1276 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1277 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1278
1279 /* decl o0 ; Fragment color */
1280 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1281 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1282
1283 /* decl t0, t1 */
1284 decl = vl_decl_temps(0, 1);
1285 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1286
1287 /*
1288 * decl s0 ; Sampler for luma texture
1289 * decl s1 ; Sampler for chroma Cb texture
1290 * decl s2 ; Sampler for chroma Cr texture
1291 * decl s3 ; Sampler for ref surface texture
1292 */
1293 for (i = 0; i < 4; ++i)
1294 {
1295 decl = vl_decl_samplers(i, i);
1296 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1297 }
1298
1299 /*
1300 * tex2d t1, i0, s0 ; Read texel from luma texture
1301 * mov t0.x, t1.x ; Move luma sample into .x component
1302 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1303 * mov t0.y, t1.x ; Move Cb sample into .y component
1304 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1305 * mov t0.z, t1.x ; Move Cr sample into .z component
1306 */
1307 for (i = 0; i < 3; ++i)
1308 {
1309 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1310 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1311
1312 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1313 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1314 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1315 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1316 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1317 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1318 }
1319
1320 /* mul t0, t0, c0 ; Rescale texel to correct range */
1321 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1322 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1323
1324 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1325 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
1326 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1327
1328 /* add o0, t0, t1 ; Add ref and differential to form final output */
1329 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1330 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1331
1332 /* end */
1333 inst = vl_end();
1334 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1335
1336 fs.tokens = tokens;
1337 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1338 free(tokens);
1339
1340 return 0;
1341 }
1342
1343 static int vlCreateFragmentShaderFieldPMB
1344 (
1345 struct vlR16SnormBufferedMC *mc
1346 )
1347 {
1348 const unsigned int max_tokens = 200;
1349
1350 struct pipe_context *pipe;
1351 struct pipe_shader_state fs;
1352 struct tgsi_token *tokens;
1353 struct tgsi_header *header;
1354
1355 struct tgsi_full_declaration decl;
1356 struct tgsi_full_instruction inst;
1357
1358 unsigned int ti;
1359 unsigned int i;
1360
1361 assert(mc);
1362
1363 pipe = mc->pipe;
1364 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1365
1366 /* Version */
1367 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1368 /* Header */
1369 header = (struct tgsi_header*)&tokens[1];
1370 *header = tgsi_build_header();
1371 /* Processor */
1372 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1373
1374 ti = 3;
1375
1376 /*
1377 * decl i0 ; Luma texcoords
1378 * decl i1 ; Chroma Cb texcoords
1379 * decl i2 ; Chroma Cr texcoords
1380 * decl i3 ; Ref macroblock top field texcoords
1381 * decl i4 ; Ref macroblock bottom field texcoords
1382 * decl i5 ; Denormalized vertex pos
1383 */
1384 for (i = 0; i < 6; ++i)
1385 {
1386 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1387 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1388 }
1389
1390 /*
1391 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1392 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1393 */
1394 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1395 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1396
1397 /* decl o0 ; Fragment color */
1398 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1399 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1400
1401 /* decl t0-t4 */
1402 decl = vl_decl_temps(0, 4);
1403 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1404
1405 /*
1406 * decl s0 ; Sampler for luma texture
1407 * decl s1 ; Sampler for chroma Cb texture
1408 * decl s2 ; Sampler for chroma Cr texture
1409 * decl s3 ; Sampler for ref surface texture
1410 */
1411 for (i = 0; i < 4; ++i)
1412 {
1413 decl = vl_decl_samplers(i, i);
1414 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1415 }
1416
1417 /*
1418 * tex2d t1, i0, s0 ; Read texel from luma texture
1419 * mov t0.x, t1.x ; Move luma sample into .x component
1420 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1421 * mov t0.y, t1.x ; Move Cb sample into .y component
1422 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1423 * mov t0.z, t1.x ; Move Cr sample into .z component
1424 */
1425 for (i = 0; i < 3; ++i)
1426 {
1427 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1428 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1429
1430 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1431 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1432 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1433 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1434 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1435 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1436 }
1437
1438 /* mul t0, t0, c0 ; Rescale texel to correct range */
1439 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1440 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1441
1442 /*
1443 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1444 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1445 */
1446 for (i = 0; i < 2; ++i)
1447 {
1448 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1449 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1450 }
1451
1452 /* XXX: Pos values off by 0.5? */
1453 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1454 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1455 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1456 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1457 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1458 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1459 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1460 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1461 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1462 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1463 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1464
1465 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1466 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1467 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1468 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1469 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1470 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1471 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1472
1473 /* floor t3, t3 ; Get rid of fractional part */
1474 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1475 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1476
1477 /* mul t3, t3, c1.y ; Multiply by 2 */
1478 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1479 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1480 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1481 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1482 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1483 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1484
1485 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1486 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1487 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1488
1489 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1490 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1491 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1492 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1493
1494 /* add o0, t0, t1 ; Add ref and differential to form final output */
1495 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1496 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1497
1498 /* end */
1499 inst = vl_end();
1500 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1501
1502 fs.tokens = tokens;
1503 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1504 free(tokens);
1505
1506 return 0;
1507 }
1508
1509 static int vlCreateVertexShaderFrameBMB
1510 (
1511 struct vlR16SnormBufferedMC *mc
1512 )
1513 {
1514 const unsigned int max_tokens = 100;
1515
1516 struct pipe_context *pipe;
1517 struct pipe_shader_state vs;
1518 struct tgsi_token *tokens;
1519 struct tgsi_header *header;
1520
1521 struct tgsi_full_declaration decl;
1522 struct tgsi_full_instruction inst;
1523
1524 unsigned int ti;
1525 unsigned int i;
1526
1527 assert(mc);
1528
1529 pipe = mc->pipe;
1530 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1531
1532 /* Version */
1533 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1534 /* Header */
1535 header = (struct tgsi_header*)&tokens[1];
1536 *header = tgsi_build_header();
1537 /* Processor */
1538 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1539
1540 ti = 3;
1541
1542 /*
1543 * decl i0 ; Vertex pos
1544 * decl i1 ; Luma texcoords
1545 * decl i2 ; Chroma Cb texcoords
1546 * decl i3 ; Chroma Cr texcoords
1547 * decl i4 ; First ref macroblock top field texcoords
1548 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1549 * decl i6 ; Second ref macroblock top field texcoords
1550 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1551 */
1552 for (i = 0; i < 8; i++)
1553 {
1554 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1555 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1556 }
1557
1558 /*
1559 * decl o0 ; Vertex pos
1560 * decl o1 ; Luma texcoords
1561 * decl o2 ; Chroma Cb texcoords
1562 * decl o3 ; Chroma Cr texcoords
1563 * decl o4 ; First ref macroblock texcoords
1564 * decl o5 ; Second ref macroblock texcoords
1565 */
1566 for (i = 0; i < 6; i++)
1567 {
1568 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1569 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1570 }
1571
1572 /*
1573 * mov o0, i0 ; Move input vertex pos to output
1574 * mov o1, i1 ; Move input luma texcoords to output
1575 * mov o2, i2 ; Move input chroma Cb texcoords to output
1576 * mov o3, i3 ; Move input chroma Cr texcoords to output
1577 */
1578 for (i = 0; i < 4; ++i)
1579 {
1580 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1581 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1582 }
1583
1584 /*
1585 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1586 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1587 */
1588 for (i = 0; i < 2; ++i)
1589 {
1590 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
1591 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1592 }
1593
1594 /* end */
1595 inst = vl_end();
1596 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1597
1598 vs.tokens = tokens;
1599 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1600 free(tokens);
1601
1602 return 0;
1603 }
1604
1605 static int vlCreateVertexShaderFieldBMB
1606 (
1607 struct vlR16SnormBufferedMC *mc
1608 )
1609 {
1610 const unsigned int max_tokens = 100;
1611
1612 struct pipe_context *pipe;
1613 struct pipe_shader_state vs;
1614 struct tgsi_token *tokens;
1615 struct tgsi_header *header;
1616
1617 struct tgsi_full_declaration decl;
1618 struct tgsi_full_instruction inst;
1619
1620 unsigned int ti;
1621 unsigned int i;
1622
1623 assert(mc);
1624
1625 pipe = mc->pipe;
1626 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1627
1628 /* Version */
1629 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1630 /* Header */
1631 header = (struct tgsi_header*)&tokens[1];
1632 *header = tgsi_build_header();
1633 /* Processor */
1634 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1635
1636 ti = 3;
1637
1638 /*
1639 * decl i0 ; Vertex pos
1640 * decl i1 ; Luma texcoords
1641 * decl i2 ; Chroma Cb texcoords
1642 * decl i3 ; Chroma Cr texcoords
1643 * decl i4 ; First ref macroblock top field texcoords
1644 * decl i5 ; First ref macroblock bottom field texcoords
1645 * decl i6 ; Second ref macroblock top field texcoords
1646 * decl i7 ; Second ref macroblock bottom field texcoords
1647 */
1648 for (i = 0; i < 8; i++)
1649 {
1650 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1651 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1652 }
1653
1654 /* decl c0 ; Render target dimensions */
1655 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1656 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1657
1658 /*
1659 * decl o0 ; Vertex pos
1660 * decl o1 ; Luma texcoords
1661 * decl o2 ; Chroma Cb texcoords
1662 * decl o3 ; Chroma Cr texcoords
1663 * decl o4 ; First ref macroblock top field texcoords
1664 * decl o5 ; First ref macroblock Bottom field texcoords
1665 * decl o6 ; Second ref macroblock top field texcoords
1666 * decl o7 ; Second ref macroblock Bottom field texcoords
1667 * decl o8 ; Denormalized vertex pos
1668 */
1669 for (i = 0; i < 9; i++)
1670 {
1671 decl = vl_decl_output((i == 0 || i == 8) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1672 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1673 }
1674
1675 /* decl t0, t1 */
1676 decl = vl_decl_temps(0, 1);
1677 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1678
1679 /*
1680 * mov o0, i0 ; Move input vertex pos to output
1681 * mov o1, i1 ; Move input luma texcoords to output
1682 * mov o2, i2 ; Move input chroma Cb texcoords to output
1683 * mov o3, i3 ; Move input chroma Cr texcoords to output
1684 */
1685 for (i = 0; i < 4; ++i)
1686 {
1687 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1688 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1689 }
1690
1691 /*
1692 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1693 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1694 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1695 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1696 */
1697 for (i = 0; i < 4; ++i)
1698 {
1699 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1700 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1701 }
1702
1703 /* mul o8, i0, c0 ; Denorm vertex pos */
1704 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1705 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1706
1707 /* end */
1708 inst = vl_end();
1709 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1710
1711 vs.tokens = tokens;
1712 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1713 free(tokens);
1714
1715 return 0;
1716 }
1717
1718 static int vlCreateFragmentShaderFrameBMB
1719 (
1720 struct vlR16SnormBufferedMC *mc
1721 )
1722 {
1723 const unsigned int max_tokens = 100;
1724
1725 struct pipe_context *pipe;
1726 struct pipe_shader_state fs;
1727 struct tgsi_token *tokens;
1728 struct tgsi_header *header;
1729
1730 struct tgsi_full_declaration decl;
1731 struct tgsi_full_instruction inst;
1732
1733 unsigned int ti;
1734 unsigned int i;
1735
1736 assert(mc);
1737
1738 pipe = mc->pipe;
1739 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1740
1741 /* Version */
1742 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1743 /* Header */
1744 header = (struct tgsi_header*)&tokens[1];
1745 *header = tgsi_build_header();
1746 /* Processor */
1747 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1748
1749 ti = 3;
1750
1751 /*
1752 * decl i0 ; Luma texcoords
1753 * decl i1 ; Chroma Cb texcoords
1754 * decl i2 ; Chroma Cr texcoords
1755 * decl i3 ; First ref macroblock texcoords
1756 * decl i4 ; Second ref macroblock texcoords
1757 */
1758 for (i = 0; i < 5; ++i)
1759 {
1760 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1761 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1762 }
1763
1764 /*
1765 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1766 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1767 */
1768 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1769 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1770
1771 /* decl o0 ; Fragment color */
1772 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1773 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1774
1775 /* decl t0-t2 */
1776 decl = vl_decl_temps(0, 2);
1777 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1778
1779 /*
1780 * decl s0 ; Sampler for luma texture
1781 * decl s1 ; Sampler for chroma Cb texture
1782 * decl s2 ; Sampler for chroma Cr texture
1783 * decl s3 ; Sampler for first ref surface texture
1784 * decl s4 ; Sampler for second ref surface texture
1785 */
1786 for (i = 0; i < 5; ++i)
1787 {
1788 decl = vl_decl_samplers(i, i);
1789 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1790 }
1791
1792 /*
1793 * tex2d t1, i0, s0 ; Read texel from luma texture
1794 * mov t0.x, t1.x ; Move luma sample into .x component
1795 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1796 * mov t0.y, t1.x ; Move Cb sample into .y component
1797 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1798 * mov t0.z, t1.x ; Move Cr sample into .z component
1799 */
1800 for (i = 0; i < 3; ++i)
1801 {
1802 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1803 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1804
1805 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1806 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1807 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1808 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1809 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1810 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1811 }
1812
1813 /* mul t0, t0, c0 ; Rescale texel to correct range */
1814 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1815 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1816
1817 /*
1818 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1819 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1820 */
1821 for (i = 0; i < 2; ++i)
1822 {
1823 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
1824 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1825 }
1826
1827 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1828 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1829 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1830 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1831 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1832 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1833 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1834
1835 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1836 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1837 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1838
1839 /* end */
1840 inst = vl_end();
1841 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1842
1843 fs.tokens = tokens;
1844 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1845 free(tokens);
1846
1847 return 0;
1848 }
1849
1850 static int vlCreateFragmentShaderFieldBMB
1851 (
1852 struct vlR16SnormBufferedMC *mc
1853 )
1854 {
1855 const unsigned int max_tokens = 200;
1856
1857 struct pipe_context *pipe;
1858 struct pipe_shader_state fs;
1859 struct tgsi_token *tokens;
1860 struct tgsi_header *header;
1861
1862 struct tgsi_full_declaration decl;
1863 struct tgsi_full_instruction inst;
1864
1865 unsigned int ti;
1866 unsigned int i;
1867
1868 assert(mc);
1869
1870 pipe = mc->pipe;
1871 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1872
1873 /* Version */
1874 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1875 /* Header */
1876 header = (struct tgsi_header*)&tokens[1];
1877 *header = tgsi_build_header();
1878 /* Processor */
1879 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1880
1881 ti = 3;
1882
1883 /*
1884 * decl i0 ; Luma texcoords
1885 * decl i1 ; Chroma Cb texcoords
1886 * decl i2 ; Chroma Cr texcoords
1887 * decl i3 ; First ref macroblock top field texcoords
1888 * decl i4 ; First ref macroblock bottom field texcoords
1889 * decl i5 ; Second ref macroblock top field texcoords
1890 * decl i6 ; Second ref macroblock bottom field texcoords
1891 * decl i7 ; Denormalized vertex pos
1892 */
1893 for (i = 0; i < 8; ++i)
1894 {
1895 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1896 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1897 }
1898
1899 /*
1900 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1901 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1902 * ; and for Y-mod-2 top/bottom field selection
1903 */
1904 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1905 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1906
1907 /* decl o0 ; Fragment color */
1908 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1909 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1910
1911 /* decl t0-t5 */
1912 decl = vl_decl_temps(0, 5);
1913 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1914
1915 /*
1916 * decl s0 ; Sampler for luma texture
1917 * decl s1 ; Sampler for chroma Cb texture
1918 * decl s2 ; Sampler for chroma Cr texture
1919 * decl s3 ; Sampler for first ref surface texture
1920 * decl s4 ; Sampler for second ref surface texture
1921 */
1922 for (i = 0; i < 5; ++i)
1923 {
1924 decl = vl_decl_samplers(i, i);
1925 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1926 }
1927
1928 /*
1929 * tex2d t1, i0, s0 ; Read texel from luma texture
1930 * mov t0.x, t1.x ; Move luma sample into .x component
1931 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1932 * mov t0.y, t1.x ; Move Cb sample into .y component
1933 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1934 * mov t0.z, t1.x ; Move Cr sample into .z component
1935 */
1936 for (i = 0; i < 3; ++i)
1937 {
1938 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1939 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1940
1941 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1942 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1943 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1944 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1945 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1946 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1947 }
1948
1949 /* mul t0, t0, c0 ; Rescale texel to correct range */
1950 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1951 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1952
1953 /* XXX: Pos values off by 0.5? */
1954 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1955 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
1956 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1957 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1958 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1959 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1960 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1961 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1962 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1963 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1964 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1965
1966 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1967 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1968 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1969 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1970 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1971 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1972 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1973
1974 /* floor t3, t3 ; Get rid of fractional part */
1975 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1976 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1977
1978 /* mul t3, t3, c1.y ; Multiply by 2 */
1979 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1980 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1981 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1982 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1983 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1984 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1985
1986 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1987 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1988 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1989
1990 /*
1991 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
1992 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
1993 */
1994 for (i = 0; i < 2; ++i)
1995 {
1996 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1997 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1998 }
1999
2000 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2001 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2002 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2003 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2004
2005 /*
2006 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2007 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2008 */
2009 for (i = 0; i < 2; ++i)
2010 {
2011 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
2012 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2013 }
2014
2015 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2016 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2017 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
2018 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2019
2020 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2021 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2022 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2023 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2024 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2025 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2026 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2027
2028 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2029 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
2030 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2031
2032 /* end */
2033 inst = vl_end();
2034 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2035
2036 fs.tokens = tokens;
2037 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
2038 free(tokens);
2039
2040 return 0;
2041 }
2042
2043 static int vlCreateDataBufs
2044 (
2045 struct vlR16SnormBufferedMC *mc
2046 )
2047 {
2048 const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
2049 const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
2050 const unsigned int num_mb_per_frame = mbw * mbh;
2051
2052 struct pipe_context *pipe;
2053 unsigned int h, i;
2054
2055 assert(mc);
2056
2057 pipe = mc->pipe;
2058
2059 /* Create our vertex buffers */
2060 for (h = 0; h < NUM_BUF_SETS; ++h)
2061 {
2062 mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4;
2063 mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1;
2064 mc->vertex_bufs[h][0].buffer_offset = 0;
2065 mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create
2066 (
2067 pipe->winsys,
2068 1,
2069 PIPE_BUFFER_USAGE_VERTEX,
2070 sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame
2071 );
2072
2073 for (i = 1; i < 3; ++i)
2074 {
2075 mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2;
2076 mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1;
2077 mc->vertex_bufs[h][i].buffer_offset = 0;
2078 mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create
2079 (
2080 pipe->winsys,
2081 1,
2082 PIPE_BUFFER_USAGE_VERTEX,
2083 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
2084 );
2085 }
2086 }
2087
2088 /* Position element */
2089 mc->vertex_elems[0].src_offset = 0;
2090 mc->vertex_elems[0].vertex_buffer_index = 0;
2091 mc->vertex_elems[0].nr_components = 2;
2092 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2093
2094 /* Luma, texcoord element */
2095 mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f);
2096 mc->vertex_elems[1].vertex_buffer_index = 0;
2097 mc->vertex_elems[1].nr_components = 2;
2098 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
2099
2100 /* Chroma Cr texcoord element */
2101 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2;
2102 mc->vertex_elems[2].vertex_buffer_index = 0;
2103 mc->vertex_elems[2].nr_components = 2;
2104 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
2105
2106 /* Chroma Cb texcoord element */
2107 mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3;
2108 mc->vertex_elems[3].vertex_buffer_index = 0;
2109 mc->vertex_elems[3].nr_components = 2;
2110 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
2111
2112 /* First ref surface top field texcoord element */
2113 mc->vertex_elems[4].src_offset = 0;
2114 mc->vertex_elems[4].vertex_buffer_index = 1;
2115 mc->vertex_elems[4].nr_components = 2;
2116 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
2117
2118 /* First ref surface bottom field texcoord element */
2119 mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f);
2120 mc->vertex_elems[5].vertex_buffer_index = 1;
2121 mc->vertex_elems[5].nr_components = 2;
2122 mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
2123
2124 /* Second ref surface top field texcoord element */
2125 mc->vertex_elems[6].src_offset = 0;
2126 mc->vertex_elems[6].vertex_buffer_index = 2;
2127 mc->vertex_elems[6].nr_components = 2;
2128 mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
2129
2130 /* Second ref surface bottom field texcoord element */
2131 mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f);
2132 mc->vertex_elems[7].vertex_buffer_index = 2;
2133 mc->vertex_elems[7].nr_components = 2;
2134 mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
2135
2136 /* Create our constant buffer */
2137 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
2138 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
2139 (
2140 pipe->winsys,
2141 1,
2142 PIPE_BUFFER_USAGE_CONSTANT,
2143 mc->vs_const_buf.size
2144 );
2145
2146 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
2147 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
2148 (
2149 pipe->winsys,
2150 1,
2151 PIPE_BUFFER_USAGE_CONSTANT,
2152 mc->fs_const_buf.size
2153 );
2154
2155 memcpy
2156 (
2157 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2158 &fs_consts,
2159 sizeof(struct vlFragmentShaderConsts)
2160 );
2161
2162 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
2163
2164 mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame);
2165
2166 return 0;
2167 }
2168
2169 static int vlInit
2170 (
2171 struct vlR16SnormBufferedMC *mc
2172 )
2173 {
2174 struct pipe_context *pipe;
2175 struct pipe_sampler_state sampler;
2176 struct pipe_texture template;
2177 unsigned int filters[5];
2178 unsigned int i;
2179
2180 assert(mc);
2181
2182 pipe = mc->pipe;
2183
2184 /* For MC we render to textures, which are rounded up to nearest POT */
2185 mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width);
2186 mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height);
2187 mc->viewport.scale[2] = 1;
2188 mc->viewport.scale[3] = 1;
2189 mc->viewport.translate[0] = 0;
2190 mc->viewport.translate[1] = 0;
2191 mc->viewport.translate[2] = 0;
2192 mc->viewport.translate[3] = 0;
2193
2194 mc->render_target.width = vlRoundUpPOT(mc->picture_width);
2195 mc->render_target.height = vlRoundUpPOT(mc->picture_height);
2196 mc->render_target.num_cbufs = 1;
2197 /* FB for MC stage is a vlSurface created by the user, set at render time */
2198 mc->render_target.zsbuf = NULL;
2199
2200 filters[0] = PIPE_TEX_FILTER_NEAREST;
2201 /* FIXME: Linear causes discoloration around block edges */
2202 filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2203 filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2204 filters[3] = PIPE_TEX_FILTER_LINEAR;
2205 filters[4] = PIPE_TEX_FILTER_LINEAR;
2206
2207 for (i = 0; i < 5; ++i)
2208 {
2209 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2210 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2211 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2212 sampler.min_img_filter = filters[i];
2213 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2214 sampler.mag_img_filter = filters[i];
2215 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2216 sampler.compare_func = PIPE_FUNC_ALWAYS;
2217 sampler.normalized_coords = 1;
2218 /*sampler.prefilter = ;*/
2219 /*sampler.shadow_ambient = ;*/
2220 /*sampler.lod_bias = ;*/
2221 sampler.min_lod = 0;
2222 /*sampler.max_lod = ;*/
2223 /*sampler.border_color[i] = ;*/
2224 /*sampler.max_anisotropy = ;*/
2225 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2226 }
2227
2228 memset(&template, 0, sizeof(struct pipe_texture));
2229 template.target = PIPE_TEXTURE_2D;
2230 template.format = PIPE_FORMAT_R16_SNORM;
2231 template.last_level = 0;
2232 template.width[0] = vlRoundUpPOT(mc->picture_width);
2233 template.height[0] = vlRoundUpPOT(mc->picture_height);
2234 template.depth[0] = 1;
2235 template.compressed = 0;
2236 pf_get_block(template.format, &template.block);
2237
2238 for (i = 0; i < NUM_BUF_SETS; ++i)
2239 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2240
2241 if (mc->picture_format == vlFormatYCbCr420)
2242 {
2243 template.width[0] = vlRoundUpPOT(mc->picture_width / 2);
2244 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2245 }
2246 else if (mc->picture_format == vlFormatYCbCr422)
2247 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2248
2249 for (i = 0; i < NUM_BUF_SETS; ++i)
2250 {
2251 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2252 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2253 }
2254
2255 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2256
2257 vlCreateVertexShaderIMB(mc);
2258 vlCreateFragmentShaderIMB(mc);
2259 vlCreateVertexShaderFramePMB(mc);
2260 vlCreateVertexShaderFieldPMB(mc);
2261 vlCreateFragmentShaderFramePMB(mc);
2262 vlCreateFragmentShaderFieldPMB(mc);
2263 vlCreateVertexShaderFrameBMB(mc);
2264 vlCreateVertexShaderFieldBMB(mc);
2265 vlCreateFragmentShaderFrameBMB(mc);
2266 vlCreateFragmentShaderFieldBMB(mc);
2267 vlCreateDataBufs(mc);
2268
2269 return 0;
2270 }
2271
2272 int vlCreateR16SNormBufferedMC
2273 (
2274 struct pipe_context *pipe,
2275 unsigned int picture_width,
2276 unsigned int picture_height,
2277 enum vlFormat picture_format,
2278 struct vlRender **render
2279 )
2280 {
2281 struct vlR16SnormBufferedMC *mc;
2282 unsigned int i;
2283
2284 assert(pipe);
2285 assert(render);
2286
2287 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2288
2289 mc->base.vlBegin = &vlBegin;
2290 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2291 mc->base.vlEnd = &vlEnd;
2292 mc->base.vlFlush = &vlFlush;
2293 mc->base.vlDestroy = &vlDestroy;
2294 mc->pipe = pipe;
2295 mc->picture_width = picture_width;
2296 mc->picture_height = picture_height;
2297
2298 mc->cur_buf = 0;
2299 mc->buffered_surface = NULL;
2300 mc->past_surface = NULL;
2301 mc->future_surface = NULL;
2302 for (i = 0; i < 3; ++i)
2303 mc->zero_block[i].x = -1.0f;
2304 mc->num_macroblocks = 0;
2305
2306 vlInit(mc);
2307
2308 *render = &mc->base;
2309
2310 return 0;
2311 }