g3dvl: Re-enable buffer rotation, disable high quality 420->444 conversion.
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf.c
1 #define VL_INTERNAL
2 #include "vl_r16snorm_mc_buf.h"
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <pipe/p_context.h>
6 #include <pipe/p_winsys.h>
7 #include <pipe/p_screen.h>
8 #include <pipe/p_state.h>
9 #include <pipe/p_util.h>
10 #include <pipe/p_inlines.h>
11 #include <tgsi/tgsi_parse.h>
12 #include <tgsi/tgsi_build.h>
13 #include "vl_render.h"
14 #include "vl_shader_build.h"
15 #include "vl_surface.h"
16 #include "vl_util.h"
17 #include "vl_types.h"
18 #include "vl_defs.h"
19
20 #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */
21
22 enum vlMacroBlockTypeEx
23 {
24 vlMacroBlockExTypeIntra,
25 vlMacroBlockExTypeFwdPredictedFrame,
26 vlMacroBlockExTypeFwdPredictedField,
27 vlMacroBlockExTypeBkwdPredictedFrame,
28 vlMacroBlockExTypeBkwdPredictedField,
29 vlMacroBlockExTypeBiPredictedFrame,
30 vlMacroBlockExTypeBiPredictedField,
31
32 vlNumMacroBlockExTypes
33 };
34
35 struct vlVertexShaderConsts
36 {
37 struct vlVertex4f denorm;
38 };
39
40 struct vlFragmentShaderConsts
41 {
42 struct vlVertex4f multiplier;
43 struct vlVertex4f div;
44 };
45
46 struct vlR16SnormBufferedMC
47 {
48 struct vlRender base;
49
50 unsigned int video_width, video_height;
51 enum vlFormat video_format;
52
53 unsigned int cur_buf;
54 struct vlSurface *buffered_surface;
55 struct vlSurface *past_surface, *future_surface;
56 struct vlVertex2f surface_tex_inv_size;
57 unsigned int num_macroblocks[vlNumMacroBlockExTypes];
58 unsigned int total_num_macroblocks;
59
60 struct pipe_context *pipe;
61 struct pipe_viewport_state viewport;
62 struct pipe_framebuffer_state render_target;
63 struct pipe_sampler_state *samplers[5];
64 struct pipe_texture *textures[NUM_BUF_SETS][5];
65 void *i_vs, *p_vs[2], *b_vs[2];
66 void *i_fs, *p_fs[2], *b_fs[2];
67 struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3];
68 struct pipe_vertex_element vertex_elems[5];
69 struct pipe_constant_buffer vs_const_buf, fs_const_buf;
70 };
71
72 static int vlBegin
73 (
74 struct vlRender *render
75 )
76 {
77 assert(render);
78
79 return 0;
80 }
81
82 static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch)
83 {
84 unsigned int y;
85
86 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
87 memcpy
88 (
89 dst + y * dst_pitch,
90 src + y * VL_BLOCK_WIDTH,
91 VL_BLOCK_WIDTH * 2
92 );
93
94 return 0;
95 }
96
97 static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch)
98 {
99 unsigned int y;
100
101 for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y)
102 memcpy
103 (
104 dst + y * dst_pitch * 2,
105 src + y * VL_BLOCK_WIDTH,
106 VL_BLOCK_WIDTH * 2
107 );
108
109 dst += VL_BLOCK_HEIGHT * dst_pitch;
110
111 for (; y < VL_BLOCK_HEIGHT; ++y)
112 memcpy
113 (
114 dst + y * dst_pitch * 2,
115 src + y * VL_BLOCK_WIDTH,
116 VL_BLOCK_WIDTH * 2
117 );
118
119 return 0;
120 }
121
122 static int vlGrabNoBlock(short *dst, unsigned int dst_pitch)
123 {
124 unsigned int y;
125
126 for (y = 0; y < VL_BLOCK_HEIGHT; ++y)
127 memset
128 (
129 dst + y * dst_pitch,
130 0,
131 VL_BLOCK_WIDTH * 2
132 );
133
134 return 0;
135 }
136
137 static int vlGrabBlocks
138 (
139 struct vlR16SnormBufferedMC *mc,
140 unsigned int mbx,
141 unsigned int mby,
142 enum vlDCTType dct_type,
143 unsigned int coded_block_pattern,
144 short *blocks
145 )
146 {
147 struct pipe_surface *tex_surface;
148 short *texels;
149 unsigned int tex_pitch;
150 unsigned int x, y, tb = 0, sb = 0;
151 unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT;
152
153 assert(mc);
154 assert(blocks);
155
156 tex_surface = mc->pipe->screen->get_tex_surface
157 (
158 mc->pipe->screen,
159 mc->textures[mc->cur_buf % NUM_BUF_SETS][0],
160 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
161 );
162
163 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
164 tex_pitch = tex_surface->stride / tex_surface->block.size;
165
166 texels += mbpy * tex_pitch + mbpx;
167
168 for (y = 0; y < 2; ++y)
169 {
170 for (x = 0; x < 2; ++x, ++tb)
171 {
172 if ((coded_block_pattern >> (5 - tb)) & 1)
173 {
174 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
175
176 if (dct_type == vlDCTTypeFrameCoded)
177 {
178 vlGrabFrameCodedBlock
179 (
180 cur_block,
181 texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH,
182 tex_pitch
183 );
184 }
185 else
186 {
187 vlGrabFieldCodedBlock
188 (
189 cur_block,
190 texels + y * tex_pitch + x * VL_BLOCK_WIDTH,
191 tex_pitch
192 );
193 }
194
195 ++sb;
196 }
197 else
198 vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch);
199 }
200 }
201
202 pipe_surface_unmap(tex_surface);
203
204 /* TODO: Implement 422, 444 */
205 mbpx >>= 1;
206 mbpy >>= 1;
207
208 for (tb = 0; tb < 2; ++tb)
209 {
210 tex_surface = mc->pipe->screen->get_tex_surface
211 (
212 mc->pipe->screen,
213 mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1],
214 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE
215 );
216
217 texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE);
218 tex_pitch = tex_surface->stride / tex_surface->block.size;
219
220 texels += mbpy * tex_pitch + mbpx;
221
222 if ((coded_block_pattern >> (1 - tb)) & 1)
223 {
224 short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT;
225
226 vlGrabFrameCodedBlock
227 (
228 cur_block,
229 texels,
230 tex_pitch
231 );
232
233 ++sb;
234 }
235 else
236 vlGrabNoBlock(texels, tex_pitch);
237
238 pipe_surface_unmap(tex_surface);
239 }
240
241 return 0;
242 }
243
244 static int vlGrabMacroBlock
245 (
246 struct vlR16SnormBufferedMC *mc,
247 struct vlMpeg2MacroBlock *macroblock
248 )
249 {
250 const struct vlVertex2f unit =
251 {
252 mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH,
253 mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT
254 };
255 const struct vlVertex2f half =
256 {
257 mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2),
258 mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2)
259 };
260
261 struct vlVertex2f *vb;
262 enum vlMacroBlockTypeEx mb_type_ex;
263 struct vlVertex2f mo_vec[2];
264 unsigned int i;
265
266 assert(mc);
267 assert(macroblock);
268
269 switch (macroblock->mb_type)
270 {
271 case vlMacroBlockTypeIntra:
272 {
273 mb_type_ex = vlMacroBlockExTypeIntra;
274 break;
275 }
276 case vlMacroBlockTypeFwdPredicted:
277 {
278 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
279 vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField;
280 break;
281 }
282 case vlMacroBlockTypeBkwdPredicted:
283 {
284 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
285 vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField;
286 break;
287 }
288 case vlMacroBlockTypeBiPredicted:
289 {
290 mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ?
291 vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField;
292 break;
293 }
294 default:
295 assert(0);
296 }
297
298 switch (macroblock->mb_type)
299 {
300 case vlMacroBlockTypeBiPredicted:
301 {
302 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
303 (
304 mc->pipe->winsys,
305 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer,
306 PIPE_BUFFER_USAGE_CPU_WRITE
307 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
308
309 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
310 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
311
312 if (macroblock->mo_type == vlMotionTypeFrame)
313 {
314 for (i = 0; i < 24 * 2; i += 2)
315 {
316 vb[i].x = mo_vec[0].x;
317 vb[i].y = mo_vec[0].y;
318 }
319 }
320 else
321 {
322 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
323 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
324
325 for (i = 0; i < 24 * 2; i += 2)
326 {
327 vb[i].x = mo_vec[0].x;
328 vb[i].y = mo_vec[0].y;
329 vb[i + 1].x = mo_vec[1].x;
330 vb[i + 1].y = mo_vec[1].y;
331 }
332 }
333
334 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer);
335
336 /* fall-through */
337 }
338 case vlMacroBlockTypeFwdPredicted:
339 case vlMacroBlockTypeBkwdPredicted:
340 {
341 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
342 (
343 mc->pipe->winsys,
344 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer,
345 PIPE_BUFFER_USAGE_CPU_WRITE
346 ) + mc->num_macroblocks[mb_type_ex] * 2 * 24;
347
348 if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted)
349 {
350 mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x;
351 mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y;
352
353 if (macroblock->mo_type == vlMotionTypeField)
354 {
355 mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x;
356 mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y;
357 }
358 }
359 else
360 {
361 mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x;
362 mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y;
363
364 if (macroblock->mo_type == vlMotionTypeField)
365 {
366 mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x;
367 mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y;
368 }
369 }
370
371 if (macroblock->mo_type == vlMotionTypeFrame)
372 {
373 for (i = 0; i < 24 * 2; i += 2)
374 {
375 vb[i].x = mo_vec[0].x;
376 vb[i].y = mo_vec[0].y;
377 }
378 }
379 else
380 {
381 for (i = 0; i < 24 * 2; i += 2)
382 {
383 vb[i].x = mo_vec[0].x;
384 vb[i].y = mo_vec[0].y;
385 vb[i + 1].x = mo_vec[1].x;
386 vb[i + 1].y = mo_vec[1].y;
387 }
388 }
389
390 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer);
391
392 /* fall-through */
393 }
394 case vlMacroBlockTypeIntra:
395 {
396 vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map
397 (
398 mc->pipe->winsys,
399 mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer,
400 PIPE_BUFFER_USAGE_CPU_WRITE
401 ) + mc->num_macroblocks[mb_type_ex] * 24;
402
403 vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y;
404 vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y;
405 vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y;
406
407 vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y;
408 vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y;
409 vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y;
410
411 vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y;
412 vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y;
413 vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y;
414
415 vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y;
416 vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y;
417 vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y;
418
419 vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y;
420 vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y;
421 vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y;
422
423 vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y;
424 vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y;
425 vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y;
426
427 vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y;
428 vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y;
429 vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y;
430
431 vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y;
432 vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y;
433 vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y;
434
435 mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer);
436
437 break;
438 }
439 default:
440 assert(0);
441 }
442
443 vlGrabBlocks
444 (
445 mc,
446 macroblock->mbx,
447 macroblock->mby,
448 macroblock->dct_type,
449 macroblock->cbp,
450 macroblock->blocks
451 );
452
453 mc->num_macroblocks[mb_type_ex]++;
454 mc->total_num_macroblocks++;
455
456 return 0;
457 }
458
459 static int vlFlush
460 (
461 struct vlRender *render
462 )
463 {
464 struct vlR16SnormBufferedMC *mc;
465 struct pipe_context *pipe;
466 struct vlVertexShaderConsts *vs_consts;
467
468 assert(mc);
469
470 mc = (struct vlR16SnormBufferedMC*)render;
471 pipe = mc->pipe;
472
473 mc->render_target.cbufs[0] = pipe->screen->get_tex_surface
474 (
475 pipe->screen,
476 mc->buffered_surface->texture,
477 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
478 );
479
480 pipe->set_framebuffer_state(pipe, &mc->render_target);
481 pipe->set_viewport_state(pipe, &mc->viewport);
482 vs_consts = pipe->winsys->buffer_map
483 (
484 pipe->winsys,
485 mc->vs_const_buf.buffer,
486 PIPE_BUFFER_USAGE_CPU_WRITE
487 );
488
489 vs_consts->denorm.x = mc->buffered_surface->texture->width[0];
490 vs_consts->denorm.y = mc->buffered_surface->texture->height[0];
491
492 pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer);
493 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf);
494
495 if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0)
496 {
497 pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]);
498 pipe->set_vertex_elements(pipe, 1, mc->vertex_elems);
499 pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
500 pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers);
501 pipe->bind_vs_state(pipe, mc->i_vs);
502 pipe->bind_fs_state(pipe, mc->i_fs);
503
504 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24);
505 }
506
507 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0)
508 {
509 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]);
510 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
511 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
512 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
513 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
514 pipe->bind_vs_state(pipe, mc->p_vs[0]);
515 pipe->bind_fs_state(pipe, mc->p_fs[0]);
516
517 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24);
518 }
519
520 if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0)
521 {
522 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]);
523 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
524 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
525 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
526 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
527 pipe->bind_vs_state(pipe, mc->p_vs[1]);
528 pipe->bind_fs_state(pipe, mc->p_fs[1]);
529
530 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24);
531 }
532
533 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0)
534 {
535 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]);
536 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
537 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
538 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
539 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
540 pipe->bind_vs_state(pipe, mc->p_vs[0]);
541 pipe->bind_fs_state(pipe, mc->p_fs[0]);
542
543 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24);
544 }
545
546 if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0)
547 {
548 pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]);
549 pipe->set_vertex_elements(pipe, 3, mc->vertex_elems);
550 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture;
551 pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
552 pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers);
553 pipe->bind_vs_state(pipe, mc->p_vs[1]);
554 pipe->bind_fs_state(pipe, mc->p_fs[1]);
555
556 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24);
557 }
558
559 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0)
560 {
561 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]);
562 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
563 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
564 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
565 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
566 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
567 pipe->bind_vs_state(pipe, mc->b_vs[0]);
568 pipe->bind_fs_state(pipe, mc->b_fs[0]);
569
570 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24);
571 }
572
573 if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0)
574 {
575 pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]);
576 pipe->set_vertex_elements(pipe, 5, mc->vertex_elems);
577 mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture;
578 mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture;
579 pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]);
580 pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers);
581 pipe->bind_vs_state(pipe, mc->b_vs[1]);
582 pipe->bind_fs_state(pipe, mc->b_fs[1]);
583
584 pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24);
585 }
586
587 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7);
588 mc->total_num_macroblocks = 0;
589 mc->cur_buf++;
590
591 return 0;
592 }
593
594 static int vlRenderMacroBlocksMpeg2R16SnormBuffered
595 (
596 struct vlRender *render,
597 struct vlMpeg2MacroBlockBatch *batch,
598 struct vlSurface *surface
599 )
600 {
601 struct vlR16SnormBufferedMC *mc;
602 unsigned int i;
603
604 assert(render);
605
606 mc = (struct vlR16SnormBufferedMC*)render;
607
608 if (mc->buffered_surface)
609 {
610 if
611 (
612 mc->buffered_surface != surface /*||
613 mc->past_surface != batch->past_surface ||
614 mc->future_surface != batch->future_surface*/
615 )
616 {
617 vlFlush(&mc->base);
618 mc->buffered_surface = surface;
619 mc->past_surface = batch->past_surface;
620 mc->future_surface = batch->future_surface;
621 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
622 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
623 }
624 }
625 else
626 {
627 mc->buffered_surface = surface;
628 mc->past_surface = batch->past_surface;
629 mc->future_surface = batch->future_surface;
630 mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0];
631 mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0];
632 }
633
634 for (i = 0; i < batch->num_macroblocks; ++i)
635 vlGrabMacroBlock(mc, &batch->macroblocks[i]);
636
637 return 0;
638 }
639
640 static int vlEnd
641 (
642 struct vlRender *render
643 )
644 {
645 assert(render);
646
647 return 0;
648 }
649
650 static int vlDestroy
651 (
652 struct vlRender *render
653 )
654 {
655 struct vlR16SnormBufferedMC *mc;
656 struct pipe_context *pipe;
657 unsigned int g, h, i;
658
659 assert(render);
660
661 mc = (struct vlR16SnormBufferedMC*)render;
662 pipe = mc->pipe;
663
664 for (i = 0; i < 5; ++i)
665 pipe->delete_sampler_state(pipe, mc->samplers[i]);
666
667 for (g = 0; g < NUM_BUF_SETS; ++g)
668 for (h = 0; h < 7; ++h)
669 for (i = 0; i < 3; ++i)
670 pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer);
671
672 /* Textures 3 & 4 are not created directly, no need to release them here */
673 for (i = 0; i < NUM_BUF_SETS; ++i)
674 {
675 pipe_texture_release(&mc->textures[i][0]);
676 pipe_texture_release(&mc->textures[i][1]);
677 pipe_texture_release(&mc->textures[i][2]);
678 }
679
680 pipe->delete_vs_state(pipe, mc->i_vs);
681 pipe->delete_fs_state(pipe, mc->i_fs);
682
683 for (i = 0; i < 2; ++i)
684 {
685 pipe->delete_vs_state(pipe, mc->p_vs[i]);
686 pipe->delete_fs_state(pipe, mc->p_fs[i]);
687 pipe->delete_vs_state(pipe, mc->b_vs[i]);
688 pipe->delete_fs_state(pipe, mc->b_fs[i]);
689 }
690
691 pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer);
692 pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer);
693
694 free(mc);
695
696 return 0;
697 }
698
699 /*
700 * Muliplier renormalizes block samples from 16 bits to 12 bits.
701 * Divider is used when calculating Y % 2 for choosing top or bottom
702 * field for P or B macroblocks.
703 * TODO: Use immediates.
704 */
705 static const struct vlFragmentShaderConsts fs_consts =
706 {
707 {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
708 {0.5f, 2.0f, 0.0f, 0.0f}
709 };
710
711 static int vlCreateVertexShaderIMB
712 (
713 struct vlR16SnormBufferedMC *mc
714 )
715 {
716 const unsigned int max_tokens = 50;
717
718 struct pipe_context *pipe;
719 struct pipe_shader_state vs;
720 struct tgsi_token *tokens;
721 struct tgsi_header *header;
722
723 struct tgsi_full_declaration decl;
724 struct tgsi_full_instruction inst;
725
726 unsigned int ti;
727 unsigned int i;
728
729 assert(mc);
730
731 pipe = mc->pipe;
732 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
733
734 /* Version */
735 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
736 /* Header */
737 header = (struct tgsi_header*)&tokens[1];
738 *header = tgsi_build_header();
739 /* Processor */
740 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
741
742 ti = 3;
743
744 /*
745 * decl i0 ; Vertex pos, luma & chroma texcoords
746 */
747 for (i = 0; i < 3; i++)
748 {
749 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
750 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
751 }
752
753 /*
754 * decl o0 ; Vertex pos
755 * decl o1 ; Luma/chroma texcoords
756 */
757 for (i = 0; i < 2; i++)
758 {
759 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
760 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
761 }
762
763 /*
764 * mov o0, i0 ; Move input vertex pos to output
765 * mov o1, i0 ; Move input luma/chroma texcoords to output
766 */
767 for (i = 0; i < 2; ++i)
768 {
769 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
770 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
771 }
772
773 /* end */
774 inst = vl_end();
775 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
776
777 vs.tokens = tokens;
778 mc->i_vs = pipe->create_vs_state(pipe, &vs);
779 free(tokens);
780
781 return 0;
782 }
783
784 static int vlCreateFragmentShaderIMB
785 (
786 struct vlR16SnormBufferedMC *mc
787 )
788 {
789 const unsigned int max_tokens = 100;
790
791 struct pipe_context *pipe;
792 struct pipe_shader_state fs;
793 struct tgsi_token *tokens;
794 struct tgsi_header *header;
795
796 struct tgsi_full_declaration decl;
797 struct tgsi_full_instruction inst;
798
799 unsigned int ti;
800 unsigned int i;
801
802 assert(mc);
803
804 pipe = mc->pipe;
805 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
806
807 /* Version */
808 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
809 /* Header */
810 header = (struct tgsi_header*)&tokens[1];
811 *header = tgsi_build_header();
812 /* Processor */
813 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
814
815 ti = 3;
816
817 /* decl i0 ; Luma/chroma texcoords */
818 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
819 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
820
821 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
822 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
823 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
824
825 /* decl o0 ; Fragment color */
826 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
827 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
828
829 /* decl t0, t1 */
830 decl = vl_decl_temps(0, 1);
831 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
832
833 /*
834 * decl s0 ; Sampler for luma texture
835 * decl s1 ; Sampler for chroma Cb texture
836 * decl s2 ; Sampler for chroma Cr texture
837 */
838 for (i = 0; i < 3; ++i)
839 {
840 decl = vl_decl_samplers(i, i);
841 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
842 }
843
844 /*
845 * tex2d t1, i0, s0 ; Read texel from luma texture
846 * mov t0.x, t1.x ; Move luma sample into .x component
847 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
848 * mov t0.y, t1.x ; Move Cb sample into .y component
849 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
850 * mov t0.z, t1.x ; Move Cr sample into .z component
851 */
852 for (i = 0; i < 3; ++i)
853 {
854 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
855 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
856
857 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
858 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
859 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
860 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
861 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
862 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
863
864 }
865
866 /* mul o0, t0, c0 ; Rescale texel to correct range */
867 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
868 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
869
870 /* end */
871 inst = vl_end();
872 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
873
874 fs.tokens = tokens;
875 mc->i_fs = pipe->create_fs_state(pipe, &fs);
876 free(tokens);
877
878 return 0;
879 }
880
881 static int vlCreateVertexShaderFramePMB
882 (
883 struct vlR16SnormBufferedMC *mc
884 )
885 {
886 const unsigned int max_tokens = 100;
887
888 struct pipe_context *pipe;
889 struct pipe_shader_state vs;
890 struct tgsi_token *tokens;
891 struct tgsi_header *header;
892
893 struct tgsi_full_declaration decl;
894 struct tgsi_full_instruction inst;
895
896 unsigned int ti;
897 unsigned int i;
898
899 assert(mc);
900
901 pipe = mc->pipe;
902 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
903
904 /* Version */
905 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
906 /* Header */
907 header = (struct tgsi_header*)&tokens[1];
908 *header = tgsi_build_header();
909 /* Processor */
910 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
911
912 ti = 3;
913
914 /*
915 * decl i0 ; Vertex pos, luma/chroma texcoords
916 * decl i1 ; Ref surface top field texcoords
917 * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream)
918 */
919 for (i = 0; i < 3; i++)
920 {
921 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
922 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
923 }
924
925 /*
926 * decl o0 ; Vertex pos
927 * decl o1 ; Luma/chroma texcoords
928 * decl o2 ; Ref macroblock texcoords
929 */
930 for (i = 0; i < 3; i++)
931 {
932 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
933 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
934 }
935
936 /*
937 * mov o0, i0 ; Move input vertex pos to output
938 * mov o1, i0 ; Move input luma/chroma texcoords to output
939 */
940 for (i = 0; i < 2; ++i)
941 {
942 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
943 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
944 }
945
946 /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
947 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1);
948 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
949
950 /* end */
951 inst = vl_end();
952 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
953
954 vs.tokens = tokens;
955 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
956 free(tokens);
957
958 return 0;
959 }
960
961 static int vlCreateVertexShaderFieldPMB
962 (
963 struct vlR16SnormBufferedMC *mc
964 )
965 {
966 const unsigned int max_tokens = 100;
967
968 struct pipe_context *pipe;
969 struct pipe_shader_state vs;
970 struct tgsi_token *tokens;
971 struct tgsi_header *header;
972
973 struct tgsi_full_declaration decl;
974 struct tgsi_full_instruction inst;
975
976 unsigned int ti;
977 unsigned int i;
978
979 assert(mc);
980
981 pipe = mc->pipe;
982 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
983
984 /* Version */
985 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
986 /* Header */
987 header = (struct tgsi_header*)&tokens[1];
988 *header = tgsi_build_header();
989 /* Processor */
990 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
991
992 ti = 3;
993
994 /*
995 * decl i0 ; Vertex pos, luma/chroma texcoords
996 * decl i1 ; Ref surface top field texcoords
997 * decl i2 ; Ref surface bottom field texcoords
998 */
999 for (i = 0; i < 3; i++)
1000 {
1001 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1002 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1003 }
1004
1005 /* decl c0 ; Texcoord denorm coefficients */
1006 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1007 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1008
1009 /*
1010 * decl o0 ; Vertex pos
1011 * decl o1 ; Luma/chroma texcoords
1012 * decl o2 ; Top field ref macroblock texcoords
1013 * decl o3 ; Bottom field ref macroblock texcoords
1014 * decl o4 ; Denormalized vertex pos
1015 */
1016 for (i = 0; i < 5; i++)
1017 {
1018 decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1019 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1020 }
1021
1022 /*
1023 * mov o0, i0 ; Move input vertex pos to output
1024 * mov o1, i0 ; Move input luma/chroma texcoords to output
1025 */
1026 for (i = 0; i < 3; ++i)
1027 {
1028 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1);
1029 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1030 }
1031
1032 /*
1033 * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords
1034 * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
1035 */
1036 for (i = 0; i < 2; ++i)
1037 {
1038 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1039 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1040 }
1041
1042 /* mul o4, i0, c0 ; Denorm vertex pos */
1043 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1044 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1045
1046 /* end */
1047 inst = vl_end();
1048 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1049
1050 vs.tokens = tokens;
1051 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
1052 free(tokens);
1053
1054 return 0;
1055 }
1056
1057 static int vlCreateFragmentShaderFramePMB
1058 (
1059 struct vlR16SnormBufferedMC *mc
1060 )
1061 {
1062 const unsigned int max_tokens = 100;
1063
1064 struct pipe_context *pipe;
1065 struct pipe_shader_state fs;
1066 struct tgsi_token *tokens;
1067 struct tgsi_header *header;
1068
1069 struct tgsi_full_declaration decl;
1070 struct tgsi_full_instruction inst;
1071
1072 unsigned int ti;
1073 unsigned int i;
1074
1075 assert(mc);
1076
1077 pipe = mc->pipe;
1078 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1079
1080 /* Version */
1081 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1082 /* Header */
1083 header = (struct tgsi_header*)&tokens[1];
1084 *header = tgsi_build_header();
1085 /* Processor */
1086 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1087
1088 ti = 3;
1089
1090 /*
1091 * decl i0 ; Texcoords for s0, s1, s2
1092 * decl i1 ; Texcoords for s3
1093 */
1094 for (i = 0; i < 2; ++i)
1095 {
1096 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1097 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1098 }
1099
1100 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
1101 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
1102 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1103
1104 /* decl o0 ; Fragment color */
1105 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1106 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1107
1108 /* decl t0, t1 */
1109 decl = vl_decl_temps(0, 1);
1110 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1111
1112 /*
1113 * decl s0 ; Sampler for luma texture
1114 * decl s1 ; Sampler for chroma Cb texture
1115 * decl s2 ; Sampler for chroma Cr texture
1116 * decl s3 ; Sampler for ref surface texture
1117 */
1118 for (i = 0; i < 4; ++i)
1119 {
1120 decl = vl_decl_samplers(i, i);
1121 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1122 }
1123
1124 /*
1125 * tex2d t1, i0, s0 ; Read texel from luma texture
1126 * mov t0.x, t1.x ; Move luma sample into .x component
1127 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1128 * mov t0.y, t1.x ; Move Cb sample into .y component
1129 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1130 * mov t0.z, t1.x ; Move Cr sample into .z component
1131 */
1132 for (i = 0; i < 3; ++i)
1133 {
1134 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1135 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1136
1137 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1138 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1139 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1140 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1141 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1142 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1143
1144 }
1145
1146 /* mul t0, t0, c0 ; Rescale texel to correct range */
1147 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1148 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1149
1150 /* tex2d t1, i1, s3 ; Read texel from ref macroblock */
1151 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3);
1152 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1153
1154 /* add o0, t0, t1 ; Add ref and differential to form final output */
1155 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1156 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1157
1158 /* end */
1159 inst = vl_end();
1160 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1161
1162 fs.tokens = tokens;
1163 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
1164 free(tokens);
1165
1166 return 0;
1167 }
1168
1169 static int vlCreateFragmentShaderFieldPMB
1170 (
1171 struct vlR16SnormBufferedMC *mc
1172 )
1173 {
1174 const unsigned int max_tokens = 200;
1175
1176 struct pipe_context *pipe;
1177 struct pipe_shader_state fs;
1178 struct tgsi_token *tokens;
1179 struct tgsi_header *header;
1180
1181 struct tgsi_full_declaration decl;
1182 struct tgsi_full_instruction inst;
1183
1184 unsigned int ti;
1185 unsigned int i;
1186
1187 assert(mc);
1188
1189 pipe = mc->pipe;
1190 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1191
1192 /* Version */
1193 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1194 /* Header */
1195 header = (struct tgsi_header*)&tokens[1];
1196 *header = tgsi_build_header();
1197 /* Processor */
1198 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1199
1200 ti = 3;
1201
1202 /*
1203 * decl i0 ; Texcoords for s0, s1, s2
1204 * decl i1 ; Texcoords for s3
1205 * decl i2 ; Texcoords for s3
1206 * decl i3 ; Denormalized vertex pos
1207 */
1208 for (i = 0; i < 4; ++i)
1209 {
1210 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1211 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1212 }
1213
1214 /*
1215 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1216 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
1217 */
1218 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1219 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1220
1221 /* decl o0 ; Fragment color */
1222 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1223 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1224
1225 /* decl t0-t4 */
1226 decl = vl_decl_temps(0, 4);
1227 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1228
1229 /*
1230 * decl s0 ; Sampler for luma texture
1231 * decl s1 ; Sampler for chroma Cb texture
1232 * decl s2 ; Sampler for chroma Cr texture
1233 * decl s3 ; Sampler for ref surface texture
1234 */
1235 for (i = 0; i < 4; ++i)
1236 {
1237 decl = vl_decl_samplers(i, i);
1238 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1239 }
1240
1241 /*
1242 * tex2d t1, i0, s0 ; Read texel from luma texture
1243 * mov t0.x, t1.x ; Move luma sample into .x component
1244 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1245 * mov t0.y, t1.x ; Move Cb sample into .y component
1246 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1247 * mov t0.z, t1.x ; Move Cr sample into .z component
1248 */
1249 for (i = 0; i < 3; ++i)
1250 {
1251 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1252 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1253
1254 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1255 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1256 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1257 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1258 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1259 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1260
1261 }
1262
1263 /* mul t0, t0, c0 ; Rescale texel to correct range */
1264 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1265 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1266
1267 /*
1268 * tex2d t1, i1, s3 ; Read texel from ref macroblock top field
1269 * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field
1270 */
1271 for (i = 0; i < 2; ++i)
1272 {
1273 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
1274 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1275 }
1276
1277 /* XXX: Pos values off by 0.5? */
1278 /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */
1279 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1);
1280 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1281 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1282 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1283 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1284 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1285 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1286 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1287 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1288 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1289
1290 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1291 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1292 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1293 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1294 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1295 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1296 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1297
1298 /* floor t3, t3 ; Get rid of fractional part */
1299 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1300 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1301
1302 /* mul t3, t3, c1.y ; Multiply by 2 */
1303 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1304 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1305 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1306 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1307 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1308 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1309
1310 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1311 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1312 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1313
1314 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1315 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1316 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1317 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1318
1319 /* add o0, t0, t1 ; Add ref and differential to form final output */
1320 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1321 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1322
1323 /* end */
1324 inst = vl_end();
1325 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1326
1327 fs.tokens = tokens;
1328 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
1329 free(tokens);
1330
1331 return 0;
1332 }
1333
1334 static int vlCreateVertexShaderFrameBMB
1335 (
1336 struct vlR16SnormBufferedMC *mc
1337 )
1338 {
1339 const unsigned int max_tokens = 100;
1340
1341 struct pipe_context *pipe;
1342 struct pipe_shader_state vs;
1343 struct tgsi_token *tokens;
1344 struct tgsi_header *header;
1345
1346 struct tgsi_full_declaration decl;
1347 struct tgsi_full_instruction inst;
1348
1349 unsigned int ti;
1350 unsigned int i;
1351
1352 assert(mc);
1353
1354 pipe = mc->pipe;
1355 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1356
1357 /* Version */
1358 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1359 /* Header */
1360 header = (struct tgsi_header*)&tokens[1];
1361 *header = tgsi_build_header();
1362 /* Processor */
1363 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1364
1365 ti = 3;
1366
1367 /*
1368 * decl i0 ; Vertex pos, luma/chroma texcoords
1369 * decl i1 ; First ref surface top field texcoords
1370 * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream)
1371 * decl i3 ; Second ref surface top field texcoords
1372 * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream)
1373 */
1374 for (i = 0; i < 5; i++)
1375 {
1376 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1377 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1378 }
1379
1380 /*
1381 * decl o0 ; Vertex pos
1382 * decl o1 ; Luma/chroma texcoords
1383 * decl o2 ; First ref macroblock texcoords
1384 * decl o3 ; Second ref macroblock texcoords
1385 */
1386 for (i = 0; i < 4; i++)
1387 {
1388 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1389 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1390 }
1391
1392 /*
1393 * mov o0, i0 ; Move input vertex pos to output
1394 * mov o1, i0 ; Move input luma/chroma texcoords to output
1395 */
1396 for (i = 0; i < 2; ++i)
1397 {
1398 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1399 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1400 }
1401
1402 /*
1403 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
1404 * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
1405 */
1406 for (i = 0; i < 2; ++i)
1407 {
1408 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1);
1409 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1410 }
1411
1412 /* end */
1413 inst = vl_end();
1414 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1415
1416 vs.tokens = tokens;
1417 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
1418 free(tokens);
1419
1420 return 0;
1421 }
1422
1423 static int vlCreateVertexShaderFieldBMB
1424 (
1425 struct vlR16SnormBufferedMC *mc
1426 )
1427 {
1428 const unsigned int max_tokens = 100;
1429
1430 struct pipe_context *pipe;
1431 struct pipe_shader_state vs;
1432 struct tgsi_token *tokens;
1433 struct tgsi_header *header;
1434
1435 struct tgsi_full_declaration decl;
1436 struct tgsi_full_instruction inst;
1437
1438 unsigned int ti;
1439 unsigned int i;
1440
1441 assert(mc);
1442
1443 pipe = mc->pipe;
1444 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1445
1446 /* Version */
1447 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1448 /* Header */
1449 header = (struct tgsi_header*)&tokens[1];
1450 *header = tgsi_build_header();
1451 /* Processor */
1452 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1453
1454 ti = 3;
1455
1456 /*
1457 * decl i0 ; Vertex pos, Luma/chroma texcoords
1458 * decl i1 ; First ref surface top field texcoords
1459 * decl i2 ; First ref surface bottom field texcoords
1460 * decl i3 ; Second ref surface top field texcoords
1461 * decl i4 ; Second ref surface bottom field texcoords
1462 */
1463 for (i = 0; i < 5; i++)
1464 {
1465 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1466 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1467 }
1468
1469 /* decl c0 ; Denorm coefficients */
1470 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1471 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1472
1473 /*
1474 * decl o0 ; Vertex pos
1475 * decl o1 ; Luma/chroma texcoords
1476 * decl o2 ; Top field past ref macroblock texcoords
1477 * decl o3 ; Bottom field past ref macroblock texcoords
1478 * decl o4 ; Top field future ref macroblock texcoords
1479 * decl o5 ; Bottom field future ref macroblock texcoords
1480 * decl o6 ; Denormalized vertex pos
1481 */
1482 for (i = 0; i < 7; i++)
1483 {
1484 decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1485 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1486 }
1487
1488 /* decl t0, t1 */
1489 decl = vl_decl_temps(0, 1);
1490 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1491
1492 /*
1493 * mov o0, i0 ; Move input vertex pos to output
1494 * mov o1, i0 ; Move input luma/chroma texcoords to output
1495 * mov o2, i1 ; Move past top field texcoords to output
1496 * mov o3, i2 ; Move past bottom field texcoords to output
1497 * mov o4, i3 ; Move future top field texcoords to output
1498 * mov o5, i4 ; Move future bottom field texcoords to output
1499 */
1500 for (i = 0; i < 6; ++i)
1501 {
1502 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0);
1503 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1504 }
1505
1506 /*
1507 * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
1508 * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
1509 * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
1510 * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
1511 */
1512 for (i = 0; i < 4; ++i)
1513 {
1514 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1);
1515 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1516 }
1517
1518 /* mul o6, i0, c0 ; Denorm vertex pos */
1519 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1520 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1521
1522 /* end */
1523 inst = vl_end();
1524 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1525
1526 vs.tokens = tokens;
1527 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
1528 free(tokens);
1529
1530 return 0;
1531 }
1532
1533 static int vlCreateFragmentShaderFrameBMB
1534 (
1535 struct vlR16SnormBufferedMC *mc
1536 )
1537 {
1538 const unsigned int max_tokens = 100;
1539
1540 struct pipe_context *pipe;
1541 struct pipe_shader_state fs;
1542 struct tgsi_token *tokens;
1543 struct tgsi_header *header;
1544
1545 struct tgsi_full_declaration decl;
1546 struct tgsi_full_instruction inst;
1547
1548 unsigned int ti;
1549 unsigned int i;
1550
1551 assert(mc);
1552
1553 pipe = mc->pipe;
1554 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1555
1556 /* Version */
1557 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1558 /* Header */
1559 header = (struct tgsi_header*)&tokens[1];
1560 *header = tgsi_build_header();
1561 /* Processor */
1562 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1563
1564 ti = 3;
1565
1566 /*
1567 * decl i0 ; Texcoords for s0, s1, s2
1568 * decl i1 ; Texcoords for s3
1569 * decl i2 ; Texcoords for s4
1570 */
1571 for (i = 0; i < 3; ++i)
1572 {
1573 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1574 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1575 }
1576
1577 /*
1578 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1579 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1580 */
1581 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1582 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1583
1584 /* decl o0 ; Fragment color */
1585 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1586 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1587
1588 /* decl t0-t2 */
1589 decl = vl_decl_temps(0, 2);
1590 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1591
1592 /*
1593 * decl s0 ; Sampler for luma texture
1594 * decl s1 ; Sampler for chroma Cb texture
1595 * decl s2 ; Sampler for chroma Cr texture
1596 * decl s3 ; Sampler for past ref surface texture
1597 * decl s4 ; Sampler for future ref surface texture
1598 */
1599 for (i = 0; i < 5; ++i)
1600 {
1601 decl = vl_decl_samplers(i, i);
1602 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1603 }
1604
1605 /*
1606 * tex2d t1, i0, s0 ; Read texel from luma texture
1607 * mov t0.x, t1.x ; Move luma sample into .x component
1608 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1609 * mov t0.y, t1.x ; Move Cb sample into .y component
1610 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1611 * mov t0.z, t1.x ; Move Cr sample into .z component
1612 */
1613 for (i = 0; i < 3; ++i)
1614 {
1615 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1616 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1617
1618 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1619 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1620 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1621 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1622 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1623 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1624
1625 }
1626
1627 /* mul t0, t0, c0 ; Rescale texel to correct range */
1628 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1629 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1630
1631 /*
1632 * tex2d t1, i1, s3 ; Read texel from past ref macroblock
1633 * tex2d t2, i2, s4 ; Read texel from future ref macroblock
1634 */
1635 for (i = 0; i < 2; ++i)
1636 {
1637 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3);
1638 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1639 }
1640
1641 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1642 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1643 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1644 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1645 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1646 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1647 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1648
1649 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1650 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1651 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1652
1653 /* end */
1654 inst = vl_end();
1655 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1656
1657 fs.tokens = tokens;
1658 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
1659 free(tokens);
1660
1661 return 0;
1662 }
1663
1664 static int vlCreateFragmentShaderFieldBMB
1665 (
1666 struct vlR16SnormBufferedMC *mc
1667 )
1668 {
1669 const unsigned int max_tokens = 200;
1670
1671 struct pipe_context *pipe;
1672 struct pipe_shader_state fs;
1673 struct tgsi_token *tokens;
1674 struct tgsi_header *header;
1675
1676 struct tgsi_full_declaration decl;
1677 struct tgsi_full_instruction inst;
1678
1679 unsigned int ti;
1680 unsigned int i;
1681
1682 assert(mc);
1683
1684 pipe = mc->pipe;
1685 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1686
1687 /* Version */
1688 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1689 /* Header */
1690 header = (struct tgsi_header*)&tokens[1];
1691 *header = tgsi_build_header();
1692 /* Processor */
1693 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1694
1695 ti = 3;
1696
1697 /*
1698 * decl i0 ; Texcoords for s0, s1, s2
1699 * decl i1 ; Texcoords for s3
1700 * decl i2 ; Texcoords for s3
1701 * decl i3 ; Texcoords for s4
1702 * decl i4 ; Texcoords for s4
1703 * decl i5 ; Denormalized vertex pos
1704 */
1705 for (i = 0; i < 6; ++i)
1706 {
1707 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1708 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1709 }
1710
1711 /*
1712 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1713 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1714 * ; and for Y-mod-2 top/bottom field selection
1715 */
1716 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1717 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1718
1719 /* decl o0 ; Fragment color */
1720 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1721 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1722
1723 /* decl t0-t5 */
1724 decl = vl_decl_temps(0, 5);
1725 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1726
1727 /*
1728 * decl s0 ; Sampler for luma texture
1729 * decl s1 ; Sampler for chroma Cb texture
1730 * decl s2 ; Sampler for chroma Cr texture
1731 * decl s3 ; Sampler for past ref surface texture
1732 * decl s4 ; Sampler for future ref surface texture
1733 */
1734 for (i = 0; i < 5; ++i)
1735 {
1736 decl = vl_decl_samplers(i, i);
1737 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1738 }
1739
1740 /*
1741 * tex2d t1, i0, s0 ; Read texel from luma texture
1742 * mov t0.x, t1.x ; Move luma sample into .x component
1743 * tex2d t1, i0, s1 ; Read texel from chroma Cb texture
1744 * mov t0.y, t1.x ; Move Cb sample into .y component
1745 * tex2d t1, i0, s2 ; Read texel from chroma Cr texture
1746 * mov t0.z, t1.x ; Move Cr sample into .z component
1747 */
1748 for (i = 0; i < 3; ++i)
1749 {
1750 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i);
1751 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1752
1753 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1754 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1755 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1756 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1757 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1758 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1759
1760 }
1761
1762 /* mul t0, t0, c0 ; Rescale texel to correct range */
1763 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1764 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1765
1766 /* XXX: Pos values off by 0.5? */
1767 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
1768 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
1769 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1770 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1771 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1772 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1773 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1774 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1775 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1776 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1777 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1778
1779 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1780 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1781 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1782 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1783 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1784 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1785 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1786
1787 /* floor t3, t3 ; Get rid of fractional part */
1788 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1789 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1790
1791 /* mul t3, t3, c1.y ; Multiply by 2 */
1792 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1793 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1794 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1795 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1796 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1797 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1798
1799 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1800 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1801 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1802
1803 /*
1804 * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field
1805 * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field
1806 */
1807 for (i = 0; i < 2; ++i)
1808 {
1809 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3);
1810 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1811 }
1812
1813 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1814 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1815 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1816 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1817
1818 /*
1819 * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field
1820 * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field
1821 */
1822 for (i = 0; i < 2; ++i)
1823 {
1824 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4);
1825 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1826 }
1827
1828 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1829 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
1830 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
1831 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1832
1833 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1834 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1835 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1836 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1837 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1838 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1839 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1840
1841 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1842 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1843 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1844
1845 /* end */
1846 inst = vl_end();
1847 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1848
1849 fs.tokens = tokens;
1850 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
1851 free(tokens);
1852
1853 return 0;
1854 }
1855
1856 static int vlCreateDataBufs
1857 (
1858 struct vlR16SnormBufferedMC *mc
1859 )
1860 {
1861 const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH;
1862 const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT;
1863 const unsigned int num_mb_per_frame = mbw * mbh;
1864
1865 struct pipe_context *pipe;
1866 unsigned int g, h, i;
1867
1868 assert(mc);
1869
1870 pipe = mc->pipe;
1871
1872 for (g = 0; g < NUM_BUF_SETS; ++g)
1873 {
1874 for (h = 0; h < 7; ++h)
1875 {
1876 /* Create our vertex buffer and vertex buffer element */
1877 mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f);
1878 mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1;
1879 mc->vertex_bufs[g][h][0].buffer_offset = 0;
1880 mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create
1881 (
1882 pipe->winsys,
1883 1,
1884 PIPE_BUFFER_USAGE_VERTEX,
1885 sizeof(struct vlVertex2f) * 24 * num_mb_per_frame
1886 );
1887 }
1888 }
1889
1890 /* Position & block luma, block chroma texcoord element */
1891 mc->vertex_elems[0].src_offset = 0;
1892 mc->vertex_elems[0].vertex_buffer_index = 0;
1893 mc->vertex_elems[0].nr_components = 2;
1894 mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
1895
1896 for (g = 0; g < NUM_BUF_SETS; ++g)
1897 {
1898 for (h = 0; h < 7; ++h)
1899 {
1900 for (i = 1; i < 3; ++i)
1901 {
1902 mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2;
1903 mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1;
1904 mc->vertex_bufs[g][h][i].buffer_offset = 0;
1905 mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create
1906 (
1907 pipe->winsys,
1908 1,
1909 PIPE_BUFFER_USAGE_VERTEX,
1910 sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame
1911 );
1912 }
1913 }
1914 }
1915
1916 /* First ref surface top field texcoord element */
1917 mc->vertex_elems[1].src_offset = 0;
1918 mc->vertex_elems[1].vertex_buffer_index = 1;
1919 mc->vertex_elems[1].nr_components = 2;
1920 mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
1921
1922 /* First ref surface bottom field texcoord element */
1923 mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f);
1924 mc->vertex_elems[2].vertex_buffer_index = 1;
1925 mc->vertex_elems[2].nr_components = 2;
1926 mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
1927
1928 /* Second ref surface top field texcoord element */
1929 mc->vertex_elems[3].src_offset = 0;
1930 mc->vertex_elems[3].vertex_buffer_index = 2;
1931 mc->vertex_elems[3].nr_components = 2;
1932 mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
1933
1934 /* Second ref surface bottom field texcoord element */
1935 mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f);
1936 mc->vertex_elems[4].vertex_buffer_index = 2;
1937 mc->vertex_elems[4].nr_components = 2;
1938 mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
1939
1940 /* Create our constant buffer */
1941 mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts);
1942 mc->vs_const_buf.buffer = pipe->winsys->buffer_create
1943 (
1944 pipe->winsys,
1945 1,
1946 PIPE_BUFFER_USAGE_CONSTANT,
1947 mc->vs_const_buf.size
1948 );
1949
1950 mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts);
1951 mc->fs_const_buf.buffer = pipe->winsys->buffer_create
1952 (
1953 pipe->winsys,
1954 1,
1955 PIPE_BUFFER_USAGE_CONSTANT,
1956 mc->fs_const_buf.size
1957 );
1958
1959 memcpy
1960 (
1961 pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1962 &fs_consts,
1963 sizeof(struct vlFragmentShaderConsts)
1964 );
1965
1966 pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer);
1967
1968 return 0;
1969 }
1970
1971 static int vlInit
1972 (
1973 struct vlR16SnormBufferedMC *mc
1974 )
1975 {
1976 struct pipe_context *pipe;
1977 struct pipe_sampler_state sampler;
1978 struct pipe_texture template;
1979 unsigned int filters[5];
1980 unsigned int i;
1981
1982 assert(mc);
1983
1984 pipe = mc->pipe;
1985
1986 /* For MC we render to textures, which are rounded up to nearest POT */
1987 mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width);
1988 mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height);
1989 mc->viewport.scale[2] = 1;
1990 mc->viewport.scale[3] = 1;
1991 mc->viewport.translate[0] = 0;
1992 mc->viewport.translate[1] = 0;
1993 mc->viewport.translate[2] = 0;
1994 mc->viewport.translate[3] = 0;
1995
1996 mc->render_target.width = vlRoundUpPOT(mc->video_width);
1997 mc->render_target.height = vlRoundUpPOT(mc->video_height);
1998 mc->render_target.num_cbufs = 1;
1999 /* FB for MC stage is a vlSurface created by the user, set at render time */
2000 mc->render_target.zsbuf = NULL;
2001
2002 filters[0] = PIPE_TEX_FILTER_NEAREST;
2003 /* FIXME: Linear causes discoloration around block edges */
2004 filters[1] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2005 filters[2] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/;
2006 filters[3] = PIPE_TEX_FILTER_LINEAR;
2007 filters[4] = PIPE_TEX_FILTER_LINEAR;
2008
2009 for (i = 0; i < 5; ++i)
2010 {
2011 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2012 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2013 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2014 sampler.min_img_filter = filters[i];
2015 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2016 sampler.mag_img_filter = filters[i];
2017 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2018 sampler.compare_func = PIPE_FUNC_ALWAYS;
2019 sampler.normalized_coords = 1;
2020 /*sampler.prefilter = ;*/
2021 /*sampler.shadow_ambient = ;*/
2022 /*sampler.lod_bias = ;*/
2023 sampler.min_lod = 0;
2024 /*sampler.max_lod = ;*/
2025 /*sampler.border_color[i] = ;*/
2026 /*sampler.max_anisotropy = ;*/
2027 mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler);
2028 }
2029
2030 memset(&template, 0, sizeof(struct pipe_texture));
2031 template.target = PIPE_TEXTURE_2D;
2032 template.format = PIPE_FORMAT_R16_SNORM;
2033 template.last_level = 0;
2034 template.width[0] = vlRoundUpPOT(mc->video_width);
2035 template.height[0] = vlRoundUpPOT(mc->video_height);
2036 template.depth[0] = 1;
2037 template.compressed = 0;
2038 pf_get_block(template.format, &template.block);
2039
2040 for (i = 0; i < NUM_BUF_SETS; ++i)
2041 mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template);
2042
2043 if (mc->video_format == vlFormatYCbCr420)
2044 {
2045 template.width[0] = vlRoundUpPOT(mc->video_width / 2);
2046 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2047 }
2048 else if (mc->video_format == vlFormatYCbCr422)
2049 template.height[0] = vlRoundUpPOT(mc->video_height / 2);
2050
2051 for (i = 0; i < NUM_BUF_SETS; ++i)
2052 {
2053 mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template);
2054 mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template);
2055 }
2056
2057 /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */
2058
2059 vlCreateVertexShaderIMB(mc);
2060 vlCreateFragmentShaderIMB(mc);
2061 vlCreateVertexShaderFramePMB(mc);
2062 vlCreateVertexShaderFieldPMB(mc);
2063 vlCreateFragmentShaderFramePMB(mc);
2064 vlCreateFragmentShaderFieldPMB(mc);
2065 vlCreateVertexShaderFrameBMB(mc);
2066 vlCreateVertexShaderFieldBMB(mc);
2067 vlCreateFragmentShaderFrameBMB(mc);
2068 vlCreateFragmentShaderFieldBMB(mc);
2069 vlCreateDataBufs(mc);
2070
2071 return 0;
2072 }
2073
2074 int vlCreateR16SNormBufferedMC
2075 (
2076 struct pipe_context *pipe,
2077 unsigned int video_width,
2078 unsigned int video_height,
2079 enum vlFormat video_format,
2080 struct vlRender **render
2081 )
2082 {
2083 struct vlR16SnormBufferedMC *mc;
2084
2085 assert(pipe);
2086 assert(render);
2087
2088 mc = calloc(1, sizeof(struct vlR16SnormBufferedMC));
2089
2090 mc->base.vlBegin = &vlBegin;
2091 mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered;
2092 mc->base.vlEnd = &vlEnd;
2093 mc->base.vlFlush = &vlFlush;
2094 mc->base.vlDestroy = &vlDestroy;
2095 mc->pipe = pipe;
2096 mc->video_width = video_width;
2097 mc->video_height = video_height;
2098
2099 mc->cur_buf = 0;
2100 mc->buffered_surface = NULL;
2101 mc->past_surface = NULL;
2102 mc->future_surface = NULL;
2103 memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7);
2104 mc->total_num_macroblocks = 0;
2105
2106 vlInit(mc);
2107
2108 *render = &mc->base;
2109
2110 return 0;
2111 }