Merge remote branch 'upstream/gallium-0.2' into nouveau-gallium-0.2
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 /*
21 * TODO: Dynamically determine number of buf sets to use, based on
22 * video size and available mem, since we can easily run out of memory
23 * for high res videos.
24 * Note: Destroying previous frame's buffers and creating new ones
25 * doesn't work, since the buffer are not actually destroyed until their
26 * fence is signalled, and if we render fast enough we will create faster
27 * than we destroy.
28 */
29 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
30
31 enum vlMacroBlockTypeEx
32 {
33 vlMacroBlockExTypeIntra,
34 vlMacroBlockExTypeFwdPredictedFrame,
35 vlMacroBlockExTypeFwdPredictedField,
36 vlMacroBlockExTypeBkwdPredictedFrame,
37 vlMacroBlockExTypeBkwdPredictedField,
38 vlMacroBlockExTypeBiPredictedFrame,
39 vlMacroBlockExTypeBiPredictedField,
40
41 vlNumMacroBlockExTypes
42 };
43
44 struct vlVertexShaderConsts
45 {
46 struct vlVertex4f denorm;
47 };
48
49 struct vlFragmentShaderConsts
50 {
51 struct vlVertex4f multiplier;
52 struct vlVertex4f div;
53 };
54
55 struct vlR16SnormBufferedMC
56 {
57 struct vlRender base;
58
59 unsigned int picture_width, picture_height;
60 enum vlFormat picture_format;
61
62 unsigned int cur_buf;
63 struct vlSurface *buffered_surface;
64 struct vlSurface *past_surface, *future_surface;
65 struct vlVertex2f surface_tex_inv_size;
66 struct vlVertex2f zero_block[3];
67 unsigned int num_macroblocks;
68 struct vlMpeg2MacroBlock *macroblocks;
69
70 struct pipe_context *pipe;
71 struct pipe_viewport_state viewport;
72 struct pipe_framebuffer_state render_target;
73 struct pipe_sampler_state *samplers[5];
74 struct pipe_texture *textures[NUM_BUF_SETS][5];
75 void *i_vs, *p_vs[2], *b_vs[2];
76 void *i_fs, *p_fs[2], *b_fs[2];
77 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3];
78 struct pipe_vertex_element vertex_elems[8];
79 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
80 };
81
82 static int vlBegin
83 (
84 struct vlRender *render
85 )
86 {
87 assert(render);
88
89 return 0;
90 }
91
92 static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
93 {
94 unsigned int y;
95
96 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
97 memcpy
98 (
99 dst + y * dst_pitch,
100 src + y * VL_BLOCK_WIDTH,
101 VL_BLOCK_WIDTH * 2
102 );
103
104 return 0;
105 }
106
107 static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
108 {
109 unsigned int y;
110
111 for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
112 memcpy
113 (
114 dst + y * dst_pitch * 2,
115 src + y * VL_BLOCK_WIDTH,
116 VL_BLOCK_WIDTH * 2
117 );
118
119 dst += VL_BLOCK_HEIGHT * dst_pitch;
120
121 for (; y < VL_BLOCK_HEIGHT; ++y)
122 memcpy
123 (
124 dst + y * dst_pitch * 2,
125 src + y * VL_BLOCK_WIDTH,
126 VL_BLOCK_WIDTH * 2
127 );
128
129 return 0;
130 }
131
132 static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
133 {
134 unsigned int y;
135
136 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
137 memset
138 (
139 dst + y * dst_pitch,
140 0,
141 VL_BLOCK_WIDTH * 2
142 );
143
144 return 0;
145 }
146
147 static inline int vlGrabBlocks
148 (
149 struct vlR16SnormBufferedMC *mc,
150 unsigned int mbx,
151 unsigned int mby,
152 enum vlDCTType dct_type,
153 unsigned int coded_block_pattern,
154 short *blocks
155 )
156 {
157 struct pipe_surface *tex_surface;
158 short *texels;
159 unsigned int tex_pitch;
160 unsigned int x, y, tb = 0, sb = 0;
161 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
162
163 assert(mc);
164 assert(blocks);
165
166 tex_surface = mc->pipe->screen->get_tex_surface
167 (
168 mc->pipe->screen,
169 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
170 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
171 );
172
173 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
174 tex_pitch = tex_surface->stride / tex_surface->block.size;
175
176 texels += mbpy * tex_pitch + mbpx;
177
178 for (y = 0; y < 2; ++y)
179 {
180 for (x = 0; x < 2; ++x, ++tb)
181 {
182 if ((coded_block_pattern >> (5 - tb)) & 1)
183 {
184 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
185
186 if (dct_type == vlDCTTypeFrameCoded)
187 {
188 vlGrabFrameCodedBlock
189 (
190 cur_block,
191 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
192 tex_pitch
193 );
194 }
195 else
196 {
197 vlGrabFieldCodedBlock
198 (
199 cur_block,
200 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
201 tex_pitch
202 );
203 }
204
205 ++sb;
206 }
207 else if (mc->zero_block[0].x < 0.0f)
208 {
209 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
210
211 mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x;
212 mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y;
213 }
214 }
215 }
216
217 pipe_surface_unmap(tex_surface);
218
219 /* TODO: Implement 422, 444 */
220 mbpx >>= 1;
221 mbpy >>= 1;
222
223 for (tb = 0; tb < 2; ++tb)
224 {
225 tex_surface = mc->pipe->screen->get_tex_surface
226 (
227 mc->pipe->screen,
228 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
229 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
230 );
231
232 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
233 tex_pitch = tex_surface->stride / tex_surface->block.size;
234
235 texels += mbpy * tex_pitch + mbpx;
236
237 if ((coded_block_pattern >> (1 - tb)) & 1)
238 {
239 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
240
241 vlGrabFrameCodedBlock
242 (
243 cur_block,
244 texels,
245 tex_pitch
246 );
247
248 ++sb;
249 }
250 else if (mc->zero_block[tb + 1].x < 0.0f)
251 {
252 vlGrabNoBlock(texels, tex_pitch);
253
254 mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x;
255 mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y;
256 }
257
258 pipe_surface_unmap(tex_surface);
259 }
260
261 return 0;
262 }
263
264 static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb)
265 {
266 assert(mb);
267
268 switch (mb->mb_type)
269 {
270 case vlMacroBlockTypeIntra:
271 return vlMacroBlockExTypeIntra;
272 case vlMacroBlockTypeFwdPredicted:
273 return mb->mo_type == vlMotionTypeFrame ?
274 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
275 case vlMacroBlockTypeBkwdPredicted:
276 return mb->mo_type == vlMotionTypeFrame ?
277 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
278 case vlMacroBlockTypeBiPredicted:
279 return mb->mo_type == vlMotionTypeFrame ?
280 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
281 default:
282 assert(0);
283 }
284
285 /* Unreachable */
286 return -1;
287 }
288
289 static inline int vlGrabMacroBlock
290 (
291 struct vlR16SnormBufferedMC *mc,
292 struct vlMpeg2MacroBlock *macroblock
293 )
294 {
295 assert(mc);
296 assert(macroblock);
297
298 mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx;
299 mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby;
300 mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type;
301 mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type;
302 mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type;
303 mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0];
304 mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1];
305 mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0];
306 mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1];
307 mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0];
308 mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1];
309 mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0];
310 mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1];
311 mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp;
312 mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks;
313
314 vlGrabBlocks
315 (
316 mc,
317 macroblock->mbx,
318 macroblock->mby,
319 macroblock->dct_type,
320 macroblock->cbp,
321 macroblock->blocks
322 );
323
324 mc->num_macroblocks++;
325
326 return 0;
327 }
328
329 #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \
330 (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \
331 (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \
332 (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \
333 (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \
334 (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \
335 (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \
336 \
337 if ((cbp) & (lm)) \
338 { \
339 (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \
340 (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
341 (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \
342 (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \
343 (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
344 (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \
345 } \
346 else \
347 { \
348 (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \
349 (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \
350 (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \
351 (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \
352 (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \
353 (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \
354 } \
355 \
356 if ((cbp) & (cbm)) \
357 { \
358 (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \
359 (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
360 (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \
361 (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \
362 (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
363 (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \
364 } \
365 else \
366 { \
367 (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \
368 (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \
369 (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \
370 (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \
371 (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \
372 (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \
373 } \
374 \
375 if ((cbp) & (crm)) \
376 { \
377 (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \
378 (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
379 (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \
380 (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \
381 (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
382 (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \
383 } \
384 else \
385 { \
386 (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \
387 (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \
388 (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \
389 (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \
390 (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \
391 (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \
392 }
393
394 static inline int vlGrabMacroBlockVB
395 (
396 struct vlR16SnormBufferedMC *mc,
397 struct vlMpeg2MacroBlock *macroblock,
398 unsigned int pos
399 )
400 {
401 struct vlVertex2f mo_vec[2];
402 unsigned int i;
403
404 assert(mc);
405 assert(macroblock);
406
407 switch (macroblock->mb_type)
408 {
409 case vlMacroBlockTypeBiPredicted:
410 {
411 struct vlVertex2f *vb;
412
413 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
414 (
415 mc->pipe->winsys,
416 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer,
417 PIPE_BUFFER_USAGE_CPU_WRITE
418 ) + pos * 2 * 24;
419
420 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
421 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
422
423 if (macroblock->mo_type == vlMotionTypeFrame)
424 {
425 for (i = 0; i < 24 * 2; i += 2)
426 {
427 vb[i].x = mo_vec[0].x;
428 vb[i].y = mo_vec[0].y;
429 }
430 }
431 else
432 {
433 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
434 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
435
436 for (i = 0; i < 24 * 2; i += 2)
437 {
438 vb[i].x = mo_vec[0].x;
439 vb[i].y = mo_vec[0].y;
440 vb[i + 1].x = mo_vec[1].x;
441 vb[i + 1].y = mo_vec[1].y;
442 }
443 }
444
445 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer);
446
447 /* fall-through */
448 }
449 case vlMacroBlockTypeFwdPredicted:
450 case vlMacroBlockTypeBkwdPredicted:
451 {
452 struct vlVertex2f *vb;
453
454 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
455 (
456 mc->pipe->winsys,
457 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer,
458 PIPE_BUFFER_USAGE_CPU_WRITE
459 ) + pos * 2 * 24;
460
461 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
462 {
463 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
464 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
465
466 if (macroblock->mo_type == vlMotionTypeField)
467 {
468 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
469 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
470 }
471 }
472 else
473 {
474 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
475 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
476
477 if (macroblock->mo_type == vlMotionTypeField)
478 {
479 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
480 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
481 }
482 }
483
484 if (macroblock->mo_type == vlMotionTypeFrame)
485 {
486 for (i = 0; i < 24 * 2; i += 2)
487 {
488 vb[i].x = mo_vec[0].x;
489 vb[i].y = mo_vec[0].y;
490 }
491 }
492 else
493 {
494 for (i = 0; i < 24 * 2; i += 2)
495 {
496 vb[i].x = mo_vec[0].x;
497 vb[i].y = mo_vec[0].y;
498 vb[i + 1].x = mo_vec[1].x;
499 vb[i + 1].y = mo_vec[1].y;
500 }
501 }
502
503 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer);
504
505 /* fall-through */
506 }
507 case vlMacroBlockTypeIntra:
508 {
509 const struct vlVertex2f unit =
510 {
511 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
512 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
513 };
514 const struct vlVertex2f half =
515 {
516 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
517 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
518 };
519
520 struct vlMacroBlockVertexStream0
521 {
522 struct vlVertex2f pos;
523 struct vlVertex2f luma_tc;
524 struct vlVertex2f cb_tc;
525 struct vlVertex2f cr_tc;
526 } *vb;
527
528 vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map
529 (
530 mc->pipe->winsys,
531 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer,
532 PIPE_BUFFER_USAGE_CPU_WRITE
533 ) + pos * 24;
534
535 SET_BLOCK
536 (
537 vb,
538 macroblock->cbp, macroblock->mbx, macroblock->mby,
539 unit.x, unit.y, 0, 0, half.x, half.y,
540 32, 2, 1, mc->zero_block
541 );
542
543 SET_BLOCK
544 (
545 vb + 6,
546 macroblock->cbp, macroblock->mbx, macroblock->mby,
547 unit.x, unit.y, half.x, 0, half.x, half.y,
548 16, 2, 1, mc->zero_block
549 );
550
551 SET_BLOCK
552 (
553 vb + 12,
554 macroblock->cbp, macroblock->mbx, macroblock->mby,
555 unit.x, unit.y, 0, half.y, half.x, half.y,
556 8, 2, 1, mc->zero_block
557 );
558
559 SET_BLOCK
560 (
561 vb + 18,
562 macroblock->cbp, macroblock->mbx, macroblock->mby,
563 unit.x, unit.y, half.x, half.y, half.x, half.y,
564 4, 2, 1, mc->zero_block
565 );
566
567 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer);
568
569 break;
570 }
571 default:
572 assert(0);
573 }
574
575 return 0;
576 }
577
578 static int vlFlush
579 (
580 struct vlRender *render
581 )
582 {
583 struct vlR16SnormBufferedMC *mc;
584 struct pipe_context *pipe;
585 struct vlVertexShaderConsts *vs_consts;
586 unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0};
587 unsigned int offset[vlNumMacroBlockExTypes];
588 unsigned int vb_start = 0;
589 unsigned int i;
590
591 assert(render);
592
593 mc = (struct vlR16SnormBufferedMC*)render;
594 pipe = mc->pipe;
595
596 for (i = 0; i < mc->num_macroblocks; ++i)
597 {
598 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
599
600 num_macroblocks[mb_type_ex]++;
601 }
602
603 offset[0] = 0;
604
605 for (i = 1; i < vlNumMacroBlockExTypes; ++i)
606 offset[i] = offset[i - 1] + num_macroblocks[i - 1];
607
608 for (i = 0; i < mc->num_macroblocks; ++i)
609 {
610 enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]);
611
612 vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]);
613
614 offset[mb_type_ex]++;
615 }
616
617 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
618 (
619 pipe->screen,
620 mc->buffered_surface->texture,
621 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
622 );
623
624 pipe->set_framebuffer_state(pipe, &mc->render_target);
625 pipe->set_viewport_state(pipe, &mc->viewport);
626 vs_consts = pipe->winsys->buffer_map
627 (
628 pipe->winsys,
629 mc->vs_const_buf.buffer,
630 PIPE_BUFFER_USAGE_CPU_WRITE
631 );
632
633 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
634 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
635
636 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
637 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf);
638 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
639
640 if (num_macroblocks[vlMacroBlockExTypeIntra] > 0)
641 {
642 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
643 pipe->set_vertex_elements(pipe, 4, mc->vertex_elems);
644 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
645 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
646 pipe->bind_vs_state(pipe, mc->i_vs);
647 pipe->bind_fs_state(pipe, mc->i_fs);
648
649 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24);
650 vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24;
651 }
652
653 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
654 {
655 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
656 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
657 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
658 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
659 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
660 pipe->bind_vs_state(pipe, mc->p_vs[0]);
661 pipe->bind_fs_state(pipe, mc->p_fs[0]);
662
663 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
664 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24;
665 }
666
667 if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
668 {
669 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
670 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
671 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
672 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
673 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
674 pipe->bind_vs_state(pipe, mc->p_vs[1]);
675 pipe->bind_fs_state(pipe, mc->p_fs[1]);
676
677 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
678 vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24;
679 }
680
681 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
682 {
683 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
684 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
685 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
686 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
687 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
688 pipe->bind_vs_state(pipe, mc->p_vs[0]);
689 pipe->bind_fs_state(pipe, mc->p_fs[0]);
690
691 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
692 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24;
693 }
694
695 if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
696 {
697 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
698 pipe->set_vertex_elements(pipe, 6, mc->vertex_elems);
699 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
700 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
701 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
702 pipe->bind_vs_state(pipe, mc->p_vs[1]);
703 pipe->bind_fs_state(pipe, mc->p_fs[1]);
704
705 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
706 vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24;
707 }
708
709 if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
710 {
711 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
712 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
713 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
714 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
715 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
716 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
717 pipe->bind_vs_state(pipe, mc->b_vs[0]);
718 pipe->bind_fs_state(pipe, mc->b_fs[0]);
719
720 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
721 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24;
722 }
723
724 if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
725 {
726 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]);
727 pipe->set_vertex_elements(pipe, 8, mc->vertex_elems);
728 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
729 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
730 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
731 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
732 pipe->bind_vs_state(pipe, mc->b_vs[1]);
733 pipe->bind_fs_state(pipe, mc->b_fs[1]);
734
735 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
736 vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24;
737 }
738
739 for (i = 0; i < 3; ++i)
740 mc->zero_block[i].x = -1.0f;
741 mc->num_macroblocks = 0;
742 mc->cur_buf++;
743
744 return 0;
745 }
746
747 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
748 (
749 struct vlRender *render,
750 struct vlMpeg2MacroBlockBatch *batch,
751 struct vlSurface *surface
752 )
753 {
754 struct vlR16SnormBufferedMC *mc;
755 unsigned int i;
756
757 assert(render);
758
759 mc = (struct vlR16SnormBufferedMC*)render;
760
761 if (mc->buffered_surface)
762 {
763 if
764 (
765 mc->buffered_surface != surface /*||
766 mc->past_surface != batch->past_surface ||
767 mc->future_surface != batch->future_surface*/
768 )
769 {
770 vlFlush(&mc->base);
771 mc->buffered_surface = surface;
772 mc->past_surface = batch->past_surface;
773 mc->future_surface = batch->future_surface;
774 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
775 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
776 }
777 }
778 else
779 {
780 mc->buffered_surface = surface;
781 mc->past_surface = batch->past_surface;
782 mc->future_surface = batch->future_surface;
783 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
784 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
785 }
786
787 for (i = 0; i < batch->num_macroblocks; ++i)
788 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
789
790 return 0;
791 }
792
793 static int vlEnd
794 (
795 struct vlRender *render
796 )
797 {
798 assert(render);
799
800 return 0;
801 }
802
803 static int vlDestroy
804 (
805 struct vlRender *render
806 )
807 {
808 struct vlR16SnormBufferedMC *mc;
809 struct pipe_context *pipe;
810 unsigned int h, i;
811
812 assert(render);
813
814 mc = (struct vlR16SnormBufferedMC*)render;
815 pipe = mc->pipe;
816
817 for (i = 0; i < 5; ++i)
818 pipe->delete_sampler_state(pipe, mc->samplers[i]);
819
820 for (h = 0; h < NUM_BUF_SETS; ++h)
821 for (i = 0; i < 3; ++i)
822 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer);
823
824 /* Textures 3 & 4 are not created directly, no need to release them here */
825 for (i = 0; i < NUM_BUF_SETS; ++i)
826 {
827 pipe_texture_release(&mc->textures[i][0]);
828 pipe_texture_release(&mc->textures[i][1]);
829 pipe_texture_release(&mc->textures[i][2]);
830 }
831
832 pipe->delete_vs_state(pipe, mc->i_vs);
833 pipe->delete_fs_state(pipe, mc->i_fs);
834
835 for (i = 0; i < 2; ++i)
836 {
837 pipe->delete_vs_state(pipe, mc->p_vs[i]);
838 pipe->delete_fs_state(pipe, mc->p_fs[i]);
839 pipe->delete_vs_state(pipe, mc->b_vs[i]);
840 pipe->delete_fs_state(pipe, mc->b_fs[i]);
841 }
842
843 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
844 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
845
846 free(mc->macroblocks);
847 free(mc);
848
849 return 0;
850 }
851
852 /*
853 * Muliplier renormalizes block samples from 16 bits to 12 bits.
854 * Divider is used when calculating Y % 2 for choosing top or bottom
855 * field for P or B macroblocks.
856 * TODO: Use immediates.
857 */
858 static const struct vlFragmentShaderConsts fs_consts =
859 {
860 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
861 {0.5f, 2.0f, 0.0f, 0.0f}
862 };
863
864 static int vlCreateVertexShaderIMB
865 (
866 struct vlR16SnormBufferedMC *mc
867 )
868 {
869 const unsigned int max_tokens = 50;
870
871 struct pipe_context *pipe;
872 struct pipe_shader_state vs;
873 struct tgsi_token *tokens;
874 struct tgsi_header *header;
875
876 struct tgsi_full_declaration decl;
877 struct tgsi_full_instruction inst;
878
879 unsigned int ti;
880 unsigned int i;
881
882 assert(mc);
883
884 pipe = mc->pipe;
885 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
886
887 /* Version */
888 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
889 /* Header */
890 header = (struct tgsi_header*)&tokens[1];
891 *header = tgsi_build_header();
892 /* Processor */
893 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
894
895 ti = 3;
896
897 /*
898 * decl i0 ; Vertex pos
899 * decl i1 ; Luma texcoords
900 * decl i2 ; Chroma Cb texcoords
901 * decl i3 ; Chroma Cr texcoords
902 */
903 for (i = 0; i < 4; i++)
904 {
905 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
906 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
907 }
908
909 /*
910 * decl o0 ; Vertex pos
911 * decl o1 ; Luma texcoords
912 * decl o2 ; Chroma Cb texcoords
913 * decl o3 ; Chroma Cr texcoords
914 */
915 for (i = 0; i < 4; i++)
916 {
917 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
918 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
919 }
920
921 /*
922 * mov o0, i0 ; Move input vertex pos to output
923 * mov o1, i1 ; Move input luma texcoords to output
924 * mov o2, i2 ; Move input chroma Cb texcoords to output
925 * mov o3, i3 ; Move input chroma Cr texcoords to output
926 */
927 for (i = 0; i < 4; ++i)
928 {
929 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
930 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
931 }
932
933 /* end */
934 inst = vl_end();
935 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
936
937 vs.tokens = tokens;
938 mc->i_vs = pipe->create_vs_state(pipe, &vs);
939 free(tokens);
940
941 return 0;
942 }
943
944 static int vlCreateFragmentShaderIMB
945 (
946 struct vlR16SnormBufferedMC *mc
947 )
948 {
949 const unsigned int max_tokens = 100;
950
951 struct pipe_context *pipe;
952 struct pipe_shader_state fs;
953 struct tgsi_token *tokens;
954 struct tgsi_header *header;
955
956 struct tgsi_full_declaration decl;
957 struct tgsi_full_instruction inst;
958
959 unsigned int ti;
960 unsigned int i;
961
962 assert(mc);
963
964 pipe = mc->pipe;
965 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
966
967 /* Version */
968 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
969 /* Header */
970 header = (struct tgsi_header*)&tokens[1];
971 *header = tgsi_build_header();
972 /* Processor */
973 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
974
975 ti = 3;
976
977 /*
978 * decl i0 ; Luma texcoords
979 * decl i1 ; Chroma Cb texcoords
980 * decl i2 ; Chroma Cr texcoords
981 */
982 for (i = 0; i < 3; ++i)
983 {
984 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, i, i, TGSI_INTERPOLATE_LINEAR);
985 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
986 }
987
988 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
989 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
990 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
991
992 /* decl o0 ; Fragment color */
993 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
994 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
995
996 /* decl t0, t1 */
997 decl = vl_decl_temps(0, 1);
998 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
999
1000 /*
1001 * decl s0 ; Sampler for luma texture
1002 * decl s1 ; Sampler for chroma Cb texture
1003 * decl s2 ; Sampler for chroma Cr texture
1004 */
1005 for (i = 0; i < 3; ++i)
1006 {
1007 decl = vl_decl_samplers(i, i);
1008 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
1009 }
1010
1011 /*
1012 * tex2d t1, i0, s0 ; Read texel from luma texture
1013 * mov t0.x, t1.x ; Move luma sample into .x component
1014 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1015 * mov t0.y, t1.x ; Move Cb sample into .y component
1016 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1017 * mov t0.z, t1.x ; Move Cr sample into .z component
1018 */
1019 for (i = 0; i < 3; ++i)
1020 {
1021 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1022 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1023
1024 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1025 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1026 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1027 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1028 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1029 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1030
1031 }
1032
1033 /* mul o0, t0, c0 ; Rescale texel to correct range */
1034 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1035 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1036
1037 /* end */
1038 inst = vl_end();
1039 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1040
1041 fs.tokens = tokens;
1042 mc->i_fs = pipe->create_fs_state(pipe, &fs);
1043 free(tokens);
1044
1045 return 0;
1046 }
1047
1048 static int vlCreateVertexShaderFramePMB
1049 (
1050 struct vlR16SnormBufferedMC *mc
1051 )
1052 {
1053 const unsigned int max_tokens = 100;
1054
1055 struct pipe_context *pipe;
1056 struct pipe_shader_state vs;
1057 struct tgsi_token *tokens;
1058 struct tgsi_header *header;
1059
1060 struct tgsi_full_declaration decl;
1061 struct tgsi_full_instruction inst;
1062
1063 unsigned int ti;
1064 unsigned int i;
1065
1066 assert(mc);
1067
1068 pipe = mc->pipe;
1069 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1070
1071 /* Version */
1072 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1073 /* Header */
1074 header = (struct tgsi_header*)&tokens[1];
1075 *header = tgsi_build_header();
1076 /* Processor */
1077 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1078
1079 ti = 3;
1080
1081 /*
1082 * decl i0 ; Vertex pos
1083 * decl i1 ; Luma texcoords
1084 * decl i2 ; Chroma Cb texcoords
1085 * decl i3 ; Chroma Cr texcoords
1086 * decl i4 ; Ref surface top field texcoords
1087 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
1088 */
1089 for (i = 0; i < 6; i++)
1090 {
1091 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1092 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1093 }
1094
1095 /*
1096 * decl o0 ; Vertex pos
1097 * decl o1 ; Luma texcoords
1098 * decl o2 ; Chroma Cb texcoords
1099 * decl o3 ; Chroma Cr texcoords
1100 * decl o4 ; Ref macroblock texcoords
1101 */
1102 for (i = 0; i < 5; i++)
1103 {
1104 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1105 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1106 }
1107
1108 /*
1109 * mov o0, i0 ; Move input vertex pos to output
1110 * mov o1, i1 ; Move input luma texcoords to output
1111 * mov o2, i2 ; Move input chroma Cb texcoords to output
1112 * mov o3, i3 ; Move input chroma Cr texcoords to output
1113 */
1114 for (i = 0; i < 4; ++i)
1115 {
1116 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1117 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1118 }
1119
1120 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
1121 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
1122 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1123
1124 /* end */
1125 inst = vl_end();
1126 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1127
1128 vs.tokens = tokens;
1129 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
1130 free(tokens);
1131
1132 return 0;
1133 }
1134
1135 static int vlCreateVertexShaderFieldPMB
1136 (
1137 struct vlR16SnormBufferedMC *mc
1138 )
1139 {
1140 const unsigned int max_tokens = 100;
1141
1142 struct pipe_context *pipe;
1143 struct pipe_shader_state vs;
1144 struct tgsi_token *tokens;
1145 struct tgsi_header *header;
1146
1147 struct tgsi_full_declaration decl;
1148 struct tgsi_full_instruction inst;
1149
1150 unsigned int ti;
1151 unsigned int i;
1152
1153 assert(mc);
1154
1155 pipe = mc->pipe;
1156 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1157
1158 /* Version */
1159 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1160 /* Header */
1161 header = (struct tgsi_header*)&tokens[1];
1162 *header = tgsi_build_header();
1163 /* Processor */
1164 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1165
1166 ti = 3;
1167
1168 /*
1169 * decl i0 ; Vertex pos
1170 * decl i1 ; Luma texcoords
1171 * decl i2 ; Chroma Cb texcoords
1172 * decl i3 ; Chroma Cr texcoords
1173 * decl i4 ; Ref macroblock top field texcoords
1174 * decl i5 ; Ref macroblock bottom field texcoords
1175 */
1176 for (i = 0; i < 6; i++)
1177 {
1178 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1179 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1180 }
1181
1182 /* decl c0 ; Texcoord denorm coefficients */
1183 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1184 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1185
1186 /*
1187 * decl o0 ; Vertex pos
1188 * decl o1 ; Luma texcoords
1189 * decl o2 ; Chroma Cb texcoords
1190 * decl o3 ; Chroma Cr texcoords
1191 * decl o4 ; Ref macroblock top field texcoords
1192 * decl o5 ; Ref macroblock bottom field texcoords
1193 * decl o6 ; Denormalized vertex pos
1194 */
1195 for (i = 0; i < 7; i++)
1196 {
1197 decl = vl_decl_output((i == 0 || i == 6) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1198 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1199 }
1200
1201 /*
1202 * mov o0, i0 ; Move input vertex pos to output
1203 * mov o1, i1 ; Move input luma texcoords to output
1204 * mov o2, i2 ; Move input chroma Cb texcoords to output
1205 * mov o3, i3 ; Move input chroma Cr texcoords to output
1206 */
1207 for (i = 0; i < 4; ++i)
1208 {
1209 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1210 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1211 }
1212
1213 /*
1214 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1215 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1216 */
1217 for (i = 0; i < 2; ++i)
1218 {
1219 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1220 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1221 }
1222
1223 /* mul o6, i0, c0 ; Denorm vertex pos */
1224 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1225 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1226
1227 /* end */
1228 inst = vl_end();
1229 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1230
1231 vs.tokens = tokens;
1232 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1233 free(tokens);
1234
1235 return 0;
1236 }
1237
1238 static int vlCreateFragmentShaderFramePMB
1239 (
1240 struct vlR16SnormBufferedMC *mc
1241 )
1242 {
1243 const unsigned int max_tokens = 100;
1244
1245 struct pipe_context *pipe;
1246 struct pipe_shader_state fs;
1247 struct tgsi_token *tokens;
1248 struct tgsi_header *header;
1249
1250 struct tgsi_full_declaration decl;
1251 struct tgsi_full_instruction inst;
1252
1253 unsigned int ti;
1254 unsigned int i;
1255
1256 assert(mc);
1257
1258 pipe = mc->pipe;
1259 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1260
1261 /* Version */
1262 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1263 /* Header */
1264 header = (struct tgsi_header*)&tokens[1];
1265 *header = tgsi_build_header();
1266 /* Processor */
1267 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1268
1269 ti = 3;
1270
1271 /*
1272 * decl i0 ; Luma texcoords
1273 * decl i1 ; Chroma Cb texcoords
1274 * decl i2 ; Chroma Cr texcoords
1275 * decl i3 ; Ref macroblock texcoords
1276 */
1277 for (i = 0; i < 4; ++i)
1278 {
1279 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1280 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1281 }
1282
1283 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1284 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1285 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1286
1287 /* decl o0 ; Fragment color */
1288 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1289 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1290
1291 /* decl t0, t1 */
1292 decl = vl_decl_temps(0, 1);
1293 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1294
1295 /*
1296 * decl s0 ; Sampler for luma texture
1297 * decl s1 ; Sampler for chroma Cb texture
1298 * decl s2 ; Sampler for chroma Cr texture
1299 * decl s3 ; Sampler for ref surface texture
1300 */
1301 for (i = 0; i < 4; ++i)
1302 {
1303 decl = vl_decl_samplers(i, i);
1304 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1305 }
1306
1307 /*
1308 * tex2d t1, i0, s0 ; Read texel from luma texture
1309 * mov t0.x, t1.x ; Move luma sample into .x component
1310 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1311 * mov t0.y, t1.x ; Move Cb sample into .y component
1312 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1313 * mov t0.z, t1.x ; Move Cr sample into .z component
1314 */
1315 for (i = 0; i < 3; ++i)
1316 {
1317 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1318 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1319
1320 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1321 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1322 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1323 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1324 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1325 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1326
1327 }
1328
1329 /* mul t0, t0, c0 ; Rescale texel to correct range */
1330 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1331 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1332
1333 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
1334 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
1335 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1336
1337 /* add o0, t0, t1 ; Add ref and differential to form final output */
1338 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1339 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1340
1341 /* end */
1342 inst = vl_end();
1343 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1344
1345 fs.tokens = tokens;
1346 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1347 free(tokens);
1348
1349 return 0;
1350 }
1351
1352 static int vlCreateFragmentShaderFieldPMB
1353 (
1354 struct vlR16SnormBufferedMC *mc
1355 )
1356 {
1357 const unsigned int max_tokens = 200;
1358
1359 struct pipe_context *pipe;
1360 struct pipe_shader_state fs;
1361 struct tgsi_token *tokens;
1362 struct tgsi_header *header;
1363
1364 struct tgsi_full_declaration decl;
1365 struct tgsi_full_instruction inst;
1366
1367 unsigned int ti;
1368 unsigned int i;
1369
1370 assert(mc);
1371
1372 pipe = mc->pipe;
1373 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1374
1375 /* Version */
1376 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1377 /* Header */
1378 header = (struct tgsi_header*)&tokens[1];
1379 *header = tgsi_build_header();
1380 /* Processor */
1381 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1382
1383 ti = 3;
1384
1385 /*
1386 * decl i0 ; Luma texcoords
1387 * decl i1 ; Chroma Cb texcoords
1388 * decl i2 ; Chroma Cr texcoords
1389 * decl i3 ; Ref macroblock top field texcoords
1390 * decl i4 ; Ref macroblock bottom field texcoords
1391 * decl i5 ; Denormalized vertex pos
1392 */
1393 for (i = 0; i < 6; ++i)
1394 {
1395 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1396 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1397 }
1398
1399 /*
1400 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1401 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1402 */
1403 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1404 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1405
1406 /* decl o0 ; Fragment color */
1407 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1408 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1409
1410 /* decl t0-t4 */
1411 decl = vl_decl_temps(0, 4);
1412 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1413
1414 /*
1415 * decl s0 ; Sampler for luma texture
1416 * decl s1 ; Sampler for chroma Cb texture
1417 * decl s2 ; Sampler for chroma Cr texture
1418 * decl s3 ; Sampler for ref surface texture
1419 */
1420 for (i = 0; i < 4; ++i)
1421 {
1422 decl = vl_decl_samplers(i, i);
1423 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1424 }
1425
1426 /*
1427 * tex2d t1, i0, s0 ; Read texel from luma texture
1428 * mov t0.x, t1.x ; Move luma sample into .x component
1429 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1430 * mov t0.y, t1.x ; Move Cb sample into .y component
1431 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1432 * mov t0.z, t1.x ; Move Cr sample into .z component
1433 */
1434 for (i = 0; i < 3; ++i)
1435 {
1436 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1437 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1438
1439 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1440 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1441 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1442 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1443 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1444 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1445
1446 }
1447
1448 /* mul t0, t0, c0 ; Rescale texel to correct range */
1449 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1450 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1451
1452 /*
1453 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
1454 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
1455 */
1456 for (i = 0; i < 2; ++i)
1457 {
1458 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1459 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1460 }
1461
1462 /* XXX: Pos values off by 0.5? */
1463 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1464 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1465 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1466 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1467 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1468 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1469 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1470 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1471 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1472 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1473 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1474
1475 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1476 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1477 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1478 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1479 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1480 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1481 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1482
1483 /* floor t3, t3 ; Get rid of fractional part */
1484 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1485 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1486
1487 /* mul t3, t3, c1.y ; Multiply by 2 */
1488 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1489 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1490 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1491 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1492 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1493 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1494
1495 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1496 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1497 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1498
1499 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1500 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1501 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1502 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1503
1504 /* add o0, t0, t1 ; Add ref and differential to form final output */
1505 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1506 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1507
1508 /* end */
1509 inst = vl_end();
1510 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1511
1512 fs.tokens = tokens;
1513 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1514 free(tokens);
1515
1516 return 0;
1517 }
1518
1519 static int vlCreateVertexShaderFrameBMB
1520 (
1521 struct vlR16SnormBufferedMC *mc
1522 )
1523 {
1524 const unsigned int max_tokens = 100;
1525
1526 struct pipe_context *pipe;
1527 struct pipe_shader_state vs;
1528 struct tgsi_token *tokens;
1529 struct tgsi_header *header;
1530
1531 struct tgsi_full_declaration decl;
1532 struct tgsi_full_instruction inst;
1533
1534 unsigned int ti;
1535 unsigned int i;
1536
1537 assert(mc);
1538
1539 pipe = mc->pipe;
1540 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1541
1542 /* Version */
1543 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1544 /* Header */
1545 header = (struct tgsi_header*)&tokens[1];
1546 *header = tgsi_build_header();
1547 /* Processor */
1548 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1549
1550 ti = 3;
1551
1552 /*
1553 * decl i0 ; Vertex pos
1554 * decl i1 ; Luma texcoords
1555 * decl i2 ; Chroma Cb texcoords
1556 * decl i3 ; Chroma Cr texcoords
1557 * decl i4 ; First ref macroblock top field texcoords
1558 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
1559 * decl i6 ; Second ref macroblock top field texcoords
1560 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
1561 */
1562 for (i = 0; i < 8; i++)
1563 {
1564 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1565 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1566 }
1567
1568 /*
1569 * decl o0 ; Vertex pos
1570 * decl o1 ; Luma texcoords
1571 * decl o2 ; Chroma Cb texcoords
1572 * decl o3 ; Chroma Cr texcoords
1573 * decl o4 ; First ref macroblock texcoords
1574 * decl o5 ; Second ref macroblock texcoords
1575 */
1576 for (i = 0; i < 6; i++)
1577 {
1578 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1579 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1580 }
1581
1582 /*
1583 * mov o0, i0 ; Move input vertex pos to output
1584 * mov o1, i1 ; Move input luma texcoords to output
1585 * mov o2, i2 ; Move input chroma Cb texcoords to output
1586 * mov o3, i3 ; Move input chroma Cr texcoords to output
1587 */
1588 for (i = 0; i < 4; ++i)
1589 {
1590 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1591 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1592 }
1593
1594 /*
1595 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1596 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1597 */
1598 for (i = 0; i < 2; ++i)
1599 {
1600 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
1601 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1602 }
1603
1604 /* end */
1605 inst = vl_end();
1606 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1607
1608 vs.tokens = tokens;
1609 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1610 free(tokens);
1611
1612 return 0;
1613 }
1614
1615 static int vlCreateVertexShaderFieldBMB
1616 (
1617 struct vlR16SnormBufferedMC *mc
1618 )
1619 {
1620 const unsigned int max_tokens = 100;
1621
1622 struct pipe_context *pipe;
1623 struct pipe_shader_state vs;
1624 struct tgsi_token *tokens;
1625 struct tgsi_header *header;
1626
1627 struct tgsi_full_declaration decl;
1628 struct tgsi_full_instruction inst;
1629
1630 unsigned int ti;
1631 unsigned int i;
1632
1633 assert(mc);
1634
1635 pipe = mc->pipe;
1636 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1637
1638 /* Version */
1639 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1640 /* Header */
1641 header = (struct tgsi_header*)&tokens[1];
1642 *header = tgsi_build_header();
1643 /* Processor */
1644 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1645
1646 ti = 3;
1647
1648 /*
1649 * decl i0 ; Vertex pos
1650 * decl i1 ; Luma texcoords
1651 * decl i2 ; Chroma Cb texcoords
1652 * decl i3 ; Chroma Cr texcoords
1653 * decl i4 ; First ref macroblock top field texcoords
1654 * decl i5 ; First ref macroblock bottom field texcoords
1655 * decl i6 ; Second ref macroblock top field texcoords
1656 * decl i7 ; Second ref macroblock bottom field texcoords
1657 */
1658 for (i = 0; i < 8; i++)
1659 {
1660 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1661 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1662 }
1663
1664 /* decl c0 ; Denorm coefficients */
1665 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1666 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1667
1668 /*
1669 * decl o0 ; Vertex pos
1670 * decl o1 ; Luma texcoords
1671 * decl o2 ; Chroma Cb texcoords
1672 * decl o3 ; Chroma Cr texcoords
1673 * decl o4 ; First ref macroblock top field texcoords
1674 * decl o5 ; First ref macroblock Bottom field texcoords
1675 * decl o6 ; Second ref macroblock top field texcoords
1676 * decl o7 ; Second ref macroblock Bottom field texcoords
1677 * decl o8 ; Denormalized vertex pos
1678 */
1679 for (i = 0; i < 9; i++)
1680 {
1681 decl = vl_decl_output((i == 0 || i == 8) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1682 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1683 }
1684
1685 /* decl t0, t1 */
1686 decl = vl_decl_temps(0, 1);
1687 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1688
1689 /*
1690 * mov o0, i0 ; Move input vertex pos to output
1691 * mov o1, i1 ; Move input luma texcoords to output
1692 * mov o2, i2 ; Move input chroma Cb texcoords to output
1693 * mov o3, i3 ; Move input chroma Cr texcoords to output
1694 */
1695 for (i = 0; i < 4; ++i)
1696 {
1697 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1698 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1699 }
1700
1701 /*
1702 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1703 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1704 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1705 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1706 */
1707 for (i = 0; i < 4; ++i)
1708 {
1709 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
1710 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1711 }
1712
1713 /* mul o8, i0, c0 ; Denorm vertex pos */
1714 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1715 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1716
1717 /* end */
1718 inst = vl_end();
1719 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1720
1721 vs.tokens = tokens;
1722 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1723 free(tokens);
1724
1725 return 0;
1726 }
1727
1728 static int vlCreateFragmentShaderFrameBMB
1729 (
1730 struct vlR16SnormBufferedMC *mc
1731 )
1732 {
1733 const unsigned int max_tokens = 100;
1734
1735 struct pipe_context *pipe;
1736 struct pipe_shader_state fs;
1737 struct tgsi_token *tokens;
1738 struct tgsi_header *header;
1739
1740 struct tgsi_full_declaration decl;
1741 struct tgsi_full_instruction inst;
1742
1743 unsigned int ti;
1744 unsigned int i;
1745
1746 assert(mc);
1747
1748 pipe = mc->pipe;
1749 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1750
1751 /* Version */
1752 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1753 /* Header */
1754 header = (struct tgsi_header*)&tokens[1];
1755 *header = tgsi_build_header();
1756 /* Processor */
1757 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1758
1759 ti = 3;
1760
1761 /*
1762 * decl i0 ; Luma texcoords
1763 * decl i1 ; Chroma Cb texcoords
1764 * decl i2 ; Chroma Cr texcoords
1765 * decl i3 ; First ref macroblock texcoords
1766 * decl i4 ; Second ref macroblock texcoords
1767 */
1768 for (i = 0; i < 5; ++i)
1769 {
1770 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1771 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1772 }
1773
1774 /*
1775 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1776 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1777 */
1778 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1779 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1780
1781 /* decl o0 ; Fragment color */
1782 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1783 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1784
1785 /* decl t0-t2 */
1786 decl = vl_decl_temps(0, 2);
1787 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1788
1789 /*
1790 * decl s0 ; Sampler for luma texture
1791 * decl s1 ; Sampler for chroma Cb texture
1792 * decl s2 ; Sampler for chroma Cr texture
1793 * decl s3 ; Sampler for first ref surface texture
1794 * decl s4 ; Sampler for second ref surface texture
1795 */
1796 for (i = 0; i < 5; ++i)
1797 {
1798 decl = vl_decl_samplers(i, i);
1799 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1800 }
1801
1802 /*
1803 * tex2d t1, i0, s0 ; Read texel from luma texture
1804 * mov t0.x, t1.x ; Move luma sample into .x component
1805 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1806 * mov t0.y, t1.x ; Move Cb sample into .y component
1807 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1808 * mov t0.z, t1.x ; Move Cr sample into .z component
1809 */
1810 for (i = 0; i < 3; ++i)
1811 {
1812 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1813 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1814
1815 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1816 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1817 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1818 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1819 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1820 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1821
1822 }
1823
1824 /* mul t0, t0, c0 ; Rescale texel to correct range */
1825 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1826 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1827
1828 /*
1829 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
1830 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
1831 */
1832 for (i = 0; i < 2; ++i)
1833 {
1834 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
1835 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1836 }
1837
1838 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1839 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1840 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1841 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1842 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1843 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1844 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1845
1846 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1847 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1848 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1849
1850 /* end */
1851 inst = vl_end();
1852 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1853
1854 fs.tokens = tokens;
1855 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1856 free(tokens);
1857
1858 return 0;
1859 }
1860
1861 static int vlCreateFragmentShaderFieldBMB
1862 (
1863 struct vlR16SnormBufferedMC *mc
1864 )
1865 {
1866 const unsigned int max_tokens = 200;
1867
1868 struct pipe_context *pipe;
1869 struct pipe_shader_state fs;
1870 struct tgsi_token *tokens;
1871 struct tgsi_header *header;
1872
1873 struct tgsi_full_declaration decl;
1874 struct tgsi_full_instruction inst;
1875
1876 unsigned int ti;
1877 unsigned int i;
1878
1879 assert(mc);
1880
1881 pipe = mc->pipe;
1882 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1883
1884 /* Version */
1885 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1886 /* Header */
1887 header = (struct tgsi_header*)&tokens[1];
1888 *header = tgsi_build_header();
1889 /* Processor */
1890 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1891
1892 ti = 3;
1893
1894 /*
1895 * decl i0 ; Luma texcoords
1896 * decl i1 ; Chroma Cb texcoords
1897 * decl i2 ; Chroma Cr texcoords
1898 * decl i3 ; First ref macroblock top field texcoords
1899 * decl i4 ; First ref macroblock bottom field texcoords
1900 * decl i5 ; Second ref macroblock top field texcoords
1901 * decl i6 ; Second ref macroblock bottom field texcoords
1902 * decl i7 ; Denormalized vertex pos
1903 */
1904 for (i = 0; i < 8; ++i)
1905 {
1906 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1907 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1908 }
1909
1910 /*
1911 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1912 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1913 * ; and for Y-mod-2 top/bottom field selection
1914 */
1915 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1916 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1917
1918 /* decl o0 ; Fragment color */
1919 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1920 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1921
1922 /* decl t0-t5 */
1923 decl = vl_decl_temps(0, 5);
1924 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1925
1926 /*
1927 * decl s0 ; Sampler for luma texture
1928 * decl s1 ; Sampler for chroma Cb texture
1929 * decl s2 ; Sampler for chroma Cr texture
1930 * decl s3 ; Sampler for first ref surface texture
1931 * decl s4 ; Sampler for second ref surface texture
1932 */
1933 for (i = 0; i < 5; ++i)
1934 {
1935 decl = vl_decl_samplers(i, i);
1936 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1937 }
1938
1939 /*
1940 * tex2d t1, i0, s0 ; Read texel from luma texture
1941 * mov t0.x, t1.x ; Move luma sample into .x component
1942 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1943 * mov t0.y, t1.x ; Move Cb sample into .y component
1944 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1945 * mov t0.z, t1.x ; Move Cr sample into .z component
1946 */
1947 for (i = 0; i < 3; ++i)
1948 {
1949 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1950 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1951
1952 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1953 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1954 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1955 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1956 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1957 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1958
1959 }
1960
1961 /* mul t0, t0, c0 ; Rescale texel to correct range */
1962 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1963 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1964
1965 /* XXX: Pos values off by 0.5? */
1966 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1967 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
1968 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1969 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1970 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1971 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1972 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1973 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1974 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1975 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1976 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1977
1978 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1979 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1980 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1981 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1982 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1983 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1984 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1985
1986 /* floor t3, t3 ; Get rid of fractional part */
1987 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1988 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1989
1990 /* mul t3, t3, c1.y ; Multiply by 2 */
1991 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1992 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1993 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1994 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1995 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1996 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1997
1998 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1999 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
2000 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2001
2002 /*
2003 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
2004 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
2005 */
2006 for (i = 0; i < 2; ++i)
2007 {
2008 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
2009 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2010 }
2011
2012 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2013 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
2014 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2015 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2016
2017 /*
2018 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
2019 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
2020 */
2021 for (i = 0; i < 2; ++i)
2022 {
2023 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
2024 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2025 }
2026
2027 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
2028 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
2029 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
2030 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2031
2032 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
2033 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
2034 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
2035 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
2036 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
2037 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
2038 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2039
2040 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
2041 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
2042 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2043
2044 /* end */
2045 inst = vl_end();
2046 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
2047
2048 fs.tokens = tokens;
2049 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
2050 free(tokens);
2051
2052 return 0;
2053 }
2054
2055 static int vlCreateDataBufs
2056 (
2057 struct vlR16SnormBufferedMC *mc
2058 )
2059 {
2060 const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
2061 const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
2062 const unsigned int num_mb_per_frame = mbw * mbh;
2063
2064 struct pipe_context *pipe;
2065 unsigned int h, i;
2066
2067 assert(mc);
2068
2069 pipe = mc->pipe;
2070
2071 /* Create our vertex buffers */
2072 for (h = 0; h < NUM_BUF_SETS; ++h)
2073 {
2074 mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4;
2075 mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1;
2076 mc->vertex_bufs[h][0].buffer_offset = 0;
2077 mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create
2078 (
2079 pipe->winsys,
2080 1,
2081 PIPE_BUFFER_USAGE_VERTEX,
2082 sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame
2083 );
2084
2085 for (i = 1; i < 3; ++i)
2086 {
2087 mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2;
2088 mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1;
2089 mc->vertex_bufs[h][i].buffer_offset = 0;
2090 mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create
2091 (
2092 pipe->winsys,
2093 1,
2094 PIPE_BUFFER_USAGE_VERTEX,
2095 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
2096 );
2097 }
2098 }
2099
2100 /* Position element */
2101 mc->vertex_elems[0].src_offset = 0;
2102 mc->vertex_elems[0].vertex_buffer_index = 0;
2103 mc->vertex_elems[0].nr_components = 2;
2104 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2105
2106 /* Luma, texcoord element */
2107 mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f);
2108 mc->vertex_elems[1].vertex_buffer_index = 0;
2109 mc->vertex_elems[1].nr_components = 2;
2110 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
2111
2112 /* Chroma Cr texcoord element */
2113 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2;
2114 mc->vertex_elems[2].vertex_buffer_index = 0;
2115 mc->vertex_elems[2].nr_components = 2;
2116 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
2117
2118 /* Chroma Cb texcoord element */
2119 mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3;
2120 mc->vertex_elems[3].vertex_buffer_index = 0;
2121 mc->vertex_elems[3].nr_components = 2;
2122 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
2123
2124 /* First ref surface top field texcoord element */
2125 mc->vertex_elems[4].src_offset = 0;
2126 mc->vertex_elems[4].vertex_buffer_index = 1;
2127 mc->vertex_elems[4].nr_components = 2;
2128 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
2129
2130 /* First ref surface bottom field texcoord element */
2131 mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f);
2132 mc->vertex_elems[5].vertex_buffer_index = 1;
2133 mc->vertex_elems[5].nr_components = 2;
2134 mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
2135
2136 /* Second ref surface top field texcoord element */
2137 mc->vertex_elems[6].src_offset = 0;
2138 mc->vertex_elems[6].vertex_buffer_index = 2;
2139 mc->vertex_elems[6].nr_components = 2;
2140 mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
2141
2142 /* Second ref surface bottom field texcoord element */
2143 mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f);
2144 mc->vertex_elems[7].vertex_buffer_index = 2;
2145 mc->vertex_elems[7].nr_components = 2;
2146 mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
2147
2148 /* Create our constant buffer */
2149 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
2150 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
2151 (
2152 pipe->winsys,
2153 1,
2154 PIPE_BUFFER_USAGE_CONSTANT,
2155 mc->vs_const_buf.size
2156 );
2157
2158 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
2159 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
2160 (
2161 pipe->winsys,
2162 1,
2163 PIPE_BUFFER_USAGE_CONSTANT,
2164 mc->fs_const_buf.size
2165 );
2166
2167 memcpy
2168 (
2169 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2170 &fs_consts,
2171 sizeof(struct vlFragmentShaderConsts)
2172 );
2173
2174 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
2175
2176 mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame);
2177
2178 return 0;
2179 }
2180
2181 static int vlInit
2182 (
2183 struct vlR16SnormBufferedMC *mc
2184 )
2185 {
2186 struct pipe_context *pipe;
2187 struct pipe_sampler_state sampler;
2188 struct pipe_texture template;
2189 unsigned int filters[5];
2190 unsigned int i;
2191
2192 assert(mc);
2193
2194 pipe = mc->pipe;
2195
2196 /* For MC we render to textures, which are rounded up to nearest POT */
2197 mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width);
2198 mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height);
2199 mc->viewport.scale[2] = 1;
2200 mc->viewport.scale[3] = 1;
2201 mc->viewport.translate[0] = 0;
2202 mc->viewport.translate[1] = 0;
2203 mc->viewport.translate[2] = 0;
2204 mc->viewport.translate[3] = 0;
2205
2206 mc->render_target.width = vlRoundUpPOT(mc->picture_width);
2207 mc->render_target.height = vlRoundUpPOT(mc->picture_height);
2208 mc->render_target.num_cbufs = 1;
2209 /* FB for MC stage is a vlSurface created by the user, set at render time */
2210 mc->render_target.zsbuf = NULL;
2211
2212 filters[0] = PIPE_TEX_FILTER_NEAREST;
2213 /* FIXME: Linear causes discoloration around block edges */
2214 filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2215 filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2216 filters[3] = PIPE_TEX_FILTER_LINEAR;
2217 filters[4] = PIPE_TEX_FILTER_LINEAR;
2218
2219 for (i = 0; i < 5; ++i)
2220 {
2221 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2222 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2223 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2224 sampler.min_img_filter = filters[i];
2225 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2226 sampler.mag_img_filter = filters[i];
2227 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2228 sampler.compare_func = PIPE_FUNC_ALWAYS;
2229 sampler.normalized_coords = 1;
2230 /*sampler.prefilter = ;*/
2231 /*sampler.shadow_ambient = ;*/
2232 /*sampler.lod_bias = ;*/
2233 sampler.min_lod = 0;
2234 /*sampler.max_lod = ;*/
2235 /*sampler.border_color[i] = ;*/
2236 /*sampler.max_anisotropy = ;*/
2237 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2238 }
2239
2240 memset(&template, 0, sizeof(struct pipe_texture));
2241 template.target = PIPE_TEXTURE_2D;
2242 template.format = PIPE_FORMAT_R16_SNORM;
2243 template.last_level = 0;
2244 template.width[0] = vlRoundUpPOT(mc->picture_width);
2245 template.height[0] = vlRoundUpPOT(mc->picture_height);
2246 template.depth[0] = 1;
2247 template.compressed = 0;
2248 pf_get_block(template.format, &template.block);
2249
2250 for (i = 0; i < NUM_BUF_SETS; ++i)
2251 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2252
2253 if (mc->picture_format == vlFormatYCbCr420)
2254 {
2255 template.width[0] = vlRoundUpPOT(mc->picture_width / 2);
2256 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2257 }
2258 else if (mc->picture_format == vlFormatYCbCr422)
2259 template.height[0] = vlRoundUpPOT(mc->picture_height / 2);
2260
2261 for (i = 0; i < NUM_BUF_SETS; ++i)
2262 {
2263 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2264 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2265 }
2266
2267 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2268
2269 vlCreateVertexShaderIMB(mc);
2270 vlCreateFragmentShaderIMB(mc);
2271 vlCreateVertexShaderFramePMB(mc);
2272 vlCreateVertexShaderFieldPMB(mc);
2273 vlCreateFragmentShaderFramePMB(mc);
2274 vlCreateFragmentShaderFieldPMB(mc);
2275 vlCreateVertexShaderFrameBMB(mc);
2276 vlCreateVertexShaderFieldBMB(mc);
2277 vlCreateFragmentShaderFrameBMB(mc);
2278 vlCreateFragmentShaderFieldBMB(mc);
2279 vlCreateDataBufs(mc);
2280
2281 return 0;
2282 }
2283
2284 int vlCreateR16SNormBufferedMC
2285 (
2286 struct pipe_context *pipe,
2287 unsigned int picture_width,
2288 unsigned int picture_height,
2289 enum vlFormat picture_format,
2290 struct vlRender **render
2291 )
2292 {
2293 struct vlR16SnormBufferedMC *mc;
2294 unsigned int i;
2295
2296 assert(pipe);
2297 assert(render);
2298
2299 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2300
2301 mc->base.vlBegin = &vlBegin;
2302 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2303 mc->base.vlEnd = &vlEnd;
2304 mc->base.vlFlush = &vlFlush;
2305 mc->base.vlDestroy = &vlDestroy;
2306 mc->pipe = pipe;
2307 mc->picture_width = picture_width;
2308 mc->picture_height = picture_height;
2309
2310 mc->cur_buf = 0;
2311 mc->buffered_surface = NULL;
2312 mc->past_surface = NULL;
2313 mc->future_surface = NULL;
2314 for (i = 0; i < 3; ++i)
2315 mc->zero_block[i].x = -1.0f;
2316 mc->num_macroblocks = 0;
2317
2318 vlInit(mc);
2319
2320 *render = &mc->base;
2321
2322 return 0;
2323 }