g3dvl: Temporarily disable IDCT.
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_context.c
1 #include "vl_context.h"
2 #include <assert.h>
3 #include <stdlib.h>
4 #include <pipe/p_context.h>
5 #include <pipe/p_winsys.h>
6 #include <pipe/p_screen.h>
7 #include <pipe/p_state.h>
8 #include <pipe/p_inlines.h>
9 #include <pipe/p_shader_tokens.h>
10 #include <tgsi/util/tgsi_parse.h>
11 #include <tgsi/util/tgsi_build.h>
12 #include "vl_shader_build.h"
13 #include "vl_data.h"
14 #include "vl_defs.h"
15 #include "vl_util.h"
16
17 static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context)
18 {
19 const unsigned int max_tokens = 50;
20
21 struct pipe_context *pipe;
22 struct pipe_shader_state fs;
23 struct tgsi_token *tokens;
24 struct tgsi_header *header;
25
26 struct tgsi_full_declaration decl;
27 struct tgsi_full_instruction inst;
28
29 unsigned int ti;
30 unsigned int i;
31
32 assert(context);
33
34 pipe = context->pipe;
35 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
36
37 /* Version */
38 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
39 /* Header */
40 header = (struct tgsi_header*)&tokens[1];
41 *header = tgsi_build_header();
42 /* Processor */
43 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
44
45 ti = 3;
46
47 /*
48 * decl i0 ; Vertex pos
49 * decl i1 ; Vertex texcoords
50 */
51 for (i = 0; i < 2; i++)
52 {
53 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
54 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
55 }
56
57 /*
58 * decl o0 ; Vertex pos
59 * decl o1 ; Vertex texcoords
60 */
61 for (i = 0; i < 2; i++)
62 {
63 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
64 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
65 }
66
67 /*
68 * mov o0, i0 ; Move pos in to pos out
69 * mov o1, i1 ; Move texcoord in to texcoord out */
70 for (i = 0; i < 2; ++i)
71 {
72 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
73 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
74 }
75
76 /* end */
77 inst = vl_end();
78 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
79
80 fs.tokens = tokens;
81 //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs);
82 free(tokens);
83
84 return 0;
85 }
86
87 static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context)
88 {
89 const unsigned int max_tokens = 50;
90
91 struct pipe_context *pipe;
92 struct pipe_shader_state fs;
93 struct tgsi_token *tokens;
94 struct tgsi_header *header;
95
96 struct tgsi_full_declaration decl;
97 struct tgsi_full_instruction inst;
98
99 unsigned int ti;
100 unsigned int i;
101
102 assert(context);
103
104 pipe = context->pipe;
105 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
106
107 /* Version */
108 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
109 /* Header */
110 header = (struct tgsi_header*)&tokens[1];
111 *header = tgsi_build_header();
112 /* Processor */
113 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
114
115 ti = 3;
116
117 /* decl i0 ; Texcoords for s0 */
118 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
119 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
120
121 /* decl o0 ; Fragment color */
122 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
123 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
124
125 /* decl s0 ; Sampler for tex containing picture to display */
126 decl = vl_decl_samplers(0, 0);
127 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
128
129 /* tex2d t0, i0, s0 ; Read src pixel */
130 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
131 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
132
133 /* sub t0, t0, c0 ; Subtract bias vector from pixel */
134 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
135 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
136
137 /*
138 * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
139 * dp4 o0.y, t0, c2
140 * dp4 o0.z, t0, c3
141 * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient
142 */
143 for (i = 0; i < 4; ++i)
144 {
145 inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
146 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
147 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
148 }
149
150 /* end */
151 inst = vl_end();
152 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
153
154 fs.tokens = tokens;
155 //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs);
156 free(tokens);
157
158 return 0;
159 }
160
161 static int vlInitIDCT(struct VL_CONTEXT *context)
162 {
163 struct pipe_context *pipe;
164 struct pipe_sampler_state sampler;
165 struct pipe_texture template;
166 unsigned int i;
167
168 assert(context);
169
170 pipe = context->pipe;
171
172 context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH;
173 context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT;
174 context->states.idct.viewport.scale[2] = 1;
175 context->states.idct.viewport.scale[3] = 1;
176 context->states.idct.viewport.translate[0] = 0;
177 context->states.idct.viewport.translate[1] = 0;
178 context->states.idct.viewport.translate[2] = 0;
179 context->states.idct.viewport.translate[3] = 0;
180
181 context->states.idct.render_target.width = VL_BLOCK_WIDTH;
182 context->states.idct.render_target.height = VL_BLOCK_HEIGHT;
183 context->states.idct.render_target.num_cbufs = 1;
184 context->states.idct.render_target.zsbuf = NULL;
185
186 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
187 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
188 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
189 sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
190 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
191 sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
192 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
193 sampler.compare_func = PIPE_FUNC_ALWAYS;
194 sampler.normalized_coords = 1;
195 /*sampler.prefilter = ;*/
196 /*sampler.shadow_ambient = ;*/
197 /*sampler.lod_bias = ;*/
198 sampler.min_lod = 0;
199 /*sampler.max_lod = ;*/
200 /*sampler.border_color[i] = ;*/
201 /*sampler.max_anisotropy = ;*/
202 context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler);
203
204 memset(&template, 0, sizeof(struct pipe_texture));
205 template.target = PIPE_TEXTURE_2D;
206 template.format = PIPE_FORMAT_A8L8_UNORM;
207 template.last_level = 0;
208 template.width[0] = 8;
209 template.height[0] = 8;
210 template.depth[0] = 1;
211 template.compressed = 0;
212 pf_get_block(template.format, &template.block);
213
214 context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template);
215
216 template.format = PIPE_FORMAT_A8R8G8B8_UNORM;
217 template.width[0] = 16;
218 template.height[0] = 1;
219
220 context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template);
221
222 for (i = 0; i < 2; ++i)
223 {
224 context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i];
225 context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i];
226 /*
227 context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F);
228 context->states.idct.vertex_bufs[i].max_index = 3;
229 context->states.idct.vertex_bufs[i].buffer_offset = 0;
230 context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create
231 (
232 pipe->winsys,
233 1,
234 PIPE_BUFFER_USAGE_VERTEX,
235 sizeof(struct VL_VERTEX2F) * 4
236 );
237
238 context->states.idct.vertex_buf_elems[i].src_offset = 0;
239 context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i;
240 context->states.idct.vertex_buf_elems[i].nr_components = 2;
241 context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
242 */
243 }
244
245 vlCreateVertexShaderFrameIDCT(context);
246 vlCreateFragmentShaderFrameIDCT(context);
247
248 return 0;
249 }
250
251 static int vlDestroyIDCT(struct VL_CONTEXT *context)
252 {
253 //unsigned int i;
254
255 assert(context);
256
257 context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler);
258
259 //for (i = 0; i < 2; ++i)
260 //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer);
261
262 pipe_texture_release(&context->states.idct.texture);
263 pipe_texture_release(&context->states.idct.basis);
264
265 //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs);
266 //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs);
267
268 //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer);
269 //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer);
270
271 return 0;
272 }
273
274 static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context)
275 {
276 const unsigned int max_tokens = 50;
277
278 struct pipe_context *pipe;
279 struct pipe_shader_state vs;
280 struct tgsi_token *tokens;
281 struct tgsi_header *header;
282
283 struct tgsi_full_declaration decl;
284 struct tgsi_full_instruction inst;
285
286 unsigned int ti;
287 unsigned int i;
288
289 assert(context);
290
291 pipe = context->pipe;
292 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
293
294 /* Version */
295 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
296 /* Header */
297 header = (struct tgsi_header*)&tokens[1];
298 *header = tgsi_build_header();
299 /* Processor */
300 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
301
302 ti = 3;
303
304 /*
305 * decl i0 ; Vertex pos
306 * decl i1 ; Luma texcoords
307 * decl i2 ; Chroma texcoords
308 */
309 for (i = 0; i < 3; i++)
310 {
311 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
312 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
313 }
314
315 /*
316 * decl c0 ; Scaling vector to scale unit rect to macroblock size
317 * decl c1 ; Translation vector to move macroblock into position
318 */
319 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
320 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
321
322 /*
323 * decl o0 ; Vertex pos
324 * decl o1 ; Luma texcoords
325 * decl o2 ; Chroma texcoords
326 */
327 for (i = 0; i < 3; i++)
328 {
329 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
330 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
331 }
332
333 /* decl t0 */
334 decl = vl_decl_temps(0, 0);
335 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
336
337 /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */
338 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
339 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
340
341 /* add o0, t0, c1 ; Translate rect into position */
342 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
343 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
344
345 /*
346 * mov o1, i1 ; Move input luma texcoords to output
347 * mov o2, i2 ; Move input chroma texcoords to output
348 */
349 for (i = 1; i < 3; ++i)
350 {
351 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
352 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
353 }
354
355 /* end */
356 inst = vl_end();
357 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
358
359 vs.tokens = tokens;
360 context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs);
361 free(tokens);
362
363 return 0;
364 }
365
366 static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context)
367 {
368 const unsigned int max_tokens = 100;
369
370 struct pipe_context *pipe;
371 struct pipe_shader_state fs;
372 struct tgsi_token *tokens;
373 struct tgsi_header *header;
374
375 struct tgsi_full_declaration decl;
376 struct tgsi_full_instruction inst;
377
378 unsigned int ti;
379 unsigned int i;
380
381 assert(context);
382
383 pipe = context->pipe;
384 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
385
386 /* Version */
387 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
388 /* Header */
389 header = (struct tgsi_header*)&tokens[1];
390 *header = tgsi_build_header();
391 /* Processor */
392 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
393
394 ti = 3;
395
396 /*
397 * decl i0 ; Texcoords for s0
398 * decl i1 ; Texcoords for s1, s2
399 */
400 for (i = 0; i < 2; ++i)
401 {
402 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
403 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
404 }
405
406 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
407 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
408 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
409
410 /* decl o0 ; Fragment color */
411 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
412 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
413
414 /* decl t0, t1 */
415 decl = vl_decl_temps(0, 1);
416 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
417
418 /*
419 * decl s0 ; Sampler for luma texture
420 * decl s1 ; Sampler for chroma Cb texture
421 * decl s2 ; Sampler for chroma Cr texture
422 */
423 for (i = 0; i < 3; ++i)
424 {
425 decl = vl_decl_samplers(i, i);
426 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
427 }
428
429 /*
430 * tex2d t1, i0, s0 ; Read texel from luma texture
431 * mov t0.x, t1.x ; Move luma sample into .x component
432 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
433 * mov t0.y, t1.x ; Move Cb sample into .y component
434 * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
435 * mov t0.z, t1.x ; Move Cr sample into .z component
436 */
437 for (i = 0; i < 3; ++i)
438 {
439 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
440 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
441
442 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
443 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
444 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
445 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
446 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
447 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
448
449 }
450
451 /* mul o0, t0, c0 ; Rescale texel to correct range */
452 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
453 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
454
455 /* end */
456 inst = vl_end();
457 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
458
459 fs.tokens = tokens;
460 context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs);
461 free(tokens);
462
463 return 0;
464 }
465
466 static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context)
467 {
468 const unsigned int max_tokens = 100;
469
470 struct pipe_context *pipe;
471 struct pipe_shader_state vs;
472 struct tgsi_token *tokens;
473 struct tgsi_header *header;
474
475 struct tgsi_full_declaration decl;
476 struct tgsi_full_instruction inst;
477
478 unsigned int ti;
479 unsigned int i;
480
481 assert(context);
482
483 pipe = context->pipe;
484 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
485
486 /* Version */
487 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
488 /* Header */
489 header = (struct tgsi_header*)&tokens[1];
490 *header = tgsi_build_header();
491 /* Processor */
492 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
493
494 ti = 3;
495
496 /*
497 * decl i0 ; Vertex pos
498 * decl i1 ; Luma texcoords
499 * decl i2 ; Chroma texcoords
500 */
501 for (i = 0; i < 3; i++)
502 {
503 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
504 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
505 }
506
507 /*
508 * decl c0 ; Scaling vector to scale unit rect to macroblock size
509 * decl c1 ; Translation vector to move macroblock into position
510 * decl c2 ; Unused
511 * decl c3 ; Translation vector to move ref macroblock texcoords into position
512 */
513 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
514 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
515
516 /*
517 * decl o0 ; Vertex pos
518 * decl o1 ; Luma texcoords
519 * decl o2 ; Chroma texcoords
520 * decl o3 ; Ref macroblock texcoords
521 */
522 for (i = 0; i < 4; i++)
523 {
524 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
525 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
526 }
527
528 /* decl t0 */
529 decl = vl_decl_temps(0, 0);
530 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
531
532 /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */
533 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
534 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
535
536 /* add o0, t0, c1 ; Translate rect into position */
537 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
538 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
539
540 /*
541 * mov o1, i1 ; Move input luma texcoords to output
542 * mov o2, i2 ; Move input chroma texcoords to output
543 */
544 for (i = 1; i < 3; ++i)
545 {
546 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
547 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
548 }
549
550 /* add o3, t0, c3 ; Translate rect into position on ref macroblock */
551 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3);
552 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
553
554 /* end */
555 inst = vl_end();
556 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
557
558 vs.tokens = tokens;
559 context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs);
560 free(tokens);
561
562 return 0;
563 }
564
565 static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context)
566 {
567 const unsigned int max_tokens = 100;
568
569 struct pipe_context *pipe;
570 struct pipe_shader_state vs;
571 struct tgsi_token *tokens;
572 struct tgsi_header *header;
573
574 struct tgsi_full_declaration decl;
575 struct tgsi_full_instruction inst;
576
577 unsigned int ti;
578 unsigned int i;
579
580 assert(context);
581
582 pipe = context->pipe;
583 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
584
585 /* Version */
586 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
587 /* Header */
588 header = (struct tgsi_header*)&tokens[1];
589 *header = tgsi_build_header();
590 /* Processor */
591 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
592
593 ti = 3;
594
595 /*
596 * decl i0 ; Vertex pos
597 * decl i1 ; Luma texcoords
598 * decl i2 ; Chroma texcoords
599 */
600 for (i = 0; i < 3; i++)
601 {
602 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
603 ti += tgsi_build_full_declaration
604 (
605 &decl,
606 &tokens[ti],
607 header,
608 max_tokens - ti
609 );
610 }
611
612 /*
613 * decl c0 ; Scaling vector to scale unit rect to macroblock size
614 * decl c1 ; Translation vector to move macroblock into position
615 * decl c2 ; Denorm coefficients
616 * decl c3 ; Translation vector to move top field ref macroblock texcoords into position
617 * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position
618 */
619 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
620 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
621
622 /*
623 * decl o0 ; Vertex pos
624 * decl o1 ; Luma texcoords
625 * decl o2 ; Chroma texcoords
626 * decl o3 ; Top field ref macroblock texcoords
627 * decl o4 ; Bottom field ref macroblock texcoords
628 * decl o5 ; Denormalized vertex pos
629 */
630 for (i = 0; i < 6; i++)
631 {
632 decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
633 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
634 }
635
636 /* decl t0, t1 */
637 decl = vl_decl_temps(0, 1);
638 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
639
640 /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */
641 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
642 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
643
644 /* add t1, t0, c1 ; Translate rect into position */
645 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
646 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
647
648 /* mov o0, t1 ; Move vertex pos to output */
649 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
650 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
651
652 /*
653 mov o1, i1 ; Move input luma texcoords to output
654 mov o2, i2 ; Move input chroma texcoords to output
655 */
656 for (i = 1; i < 3; ++i)
657 {
658 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
659 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
660 }
661
662 /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock
663 add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */
664 for (i = 0; i < 2; ++i)
665 {
666 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
667 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
668 }
669
670 /* mul o5, t1, c2 ; Denorm vertex pos */
671 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
672 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
673
674 /* end */
675 inst = vl_end();
676 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
677
678 vs.tokens = tokens;
679 context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs);
680 free(tokens);
681
682 return 0;
683 }
684
685 static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context)
686 {
687 const unsigned int max_tokens = 100;
688
689 struct pipe_context *pipe;
690 struct pipe_shader_state fs;
691 struct tgsi_token *tokens;
692 struct tgsi_header *header;
693
694 struct tgsi_full_declaration decl;
695 struct tgsi_full_instruction inst;
696
697 unsigned int ti;
698 unsigned int i;
699
700 assert(context);
701
702 pipe = context->pipe;
703 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
704
705 /* Version */
706 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
707 /* Header */
708 header = (struct tgsi_header*)&tokens[1];
709 *header = tgsi_build_header();
710 /* Processor */
711 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
712
713 ti = 3;
714
715 /*
716 * decl i0 ; Texcoords for s0
717 * decl i1 ; Texcoords for s1, s2
718 * decl i2 ; Texcoords for s3
719 */
720 for (i = 0; i < 3; ++i)
721 {
722 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
723 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
724 }
725
726 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
727 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
728 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
729
730 /* decl o0 ; Fragment color */
731 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
732 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
733
734 /* decl t0, t1 */
735 decl = vl_decl_temps(0, 1);
736 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
737
738 /*
739 * decl s0 ; Sampler for luma texture
740 * decl s1 ; Sampler for chroma Cb texture
741 * decl s2 ; Sampler for chroma Cr texture
742 * decl s3 ; Sampler for ref surface texture
743 */
744 for (i = 0; i < 4; ++i)
745 {
746 decl = vl_decl_samplers(i, i);
747 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
748 }
749
750 /*
751 * tex2d t1, i0, s0 ; Read texel from luma texture
752 * mov t0.x, t1.x ; Move luma sample into .x component
753 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
754 * mov t0.y, t1.x ; Move Cb sample into .y component
755 * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
756 * mov t0.z, t1.x ; Move Cr sample into .z component
757 */
758 for (i = 0; i < 3; ++i)
759 {
760 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
761 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
762
763 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
764 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
765 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
766 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
767 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
768 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
769
770 }
771
772 /* mul t0, t0, c0 ; Rescale texel to correct range */
773 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
774 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
775
776 /* tex2d t1, i2, s3 ; Read texel from ref macroblock */
777 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3);
778 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
779
780 /* add o0, t0, t1 ; Add ref and differential to form final output */
781 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
782 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
783
784 /* end */
785 inst = vl_end();
786 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
787
788 fs.tokens = tokens;
789 context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs);
790 free(tokens);
791
792 return 0;
793 }
794
795 static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context)
796 {
797 const unsigned int max_tokens = 200;
798
799 struct pipe_context *pipe;
800 struct pipe_shader_state fs;
801 struct tgsi_token *tokens;
802 struct tgsi_header *header;
803
804 struct tgsi_full_declaration decl;
805 struct tgsi_full_instruction inst;
806
807 unsigned int ti;
808 unsigned int i;
809
810 assert(context);
811
812 pipe = context->pipe;
813 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
814
815 /* Version */
816 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
817 /* Header */
818 header = (struct tgsi_header*)&tokens[1];
819 *header = tgsi_build_header();
820 /* Processor */
821 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
822
823 ti = 3;
824
825 /*
826 * decl i0 ; Texcoords for s0
827 * decl i1 ; Texcoords for s1, s2
828 * decl i2 ; Texcoords for s3
829 * decl i3 ; Texcoords for s3
830 * decl i4 ; Denormalized vertex pos
831 */
832 for (i = 0; i < 5; ++i)
833 {
834 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
835 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
836 }
837
838 /*
839 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
840 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
841 */
842 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
843 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
844
845 /* decl o0 ; Fragment color */
846 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
847 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
848
849 /* decl t0-t4 */
850 decl = vl_decl_temps(0, 4);
851 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
852
853 /*
854 * decl s0 ; Sampler for luma texture
855 * decl s1 ; Sampler for chroma Cb texture
856 * decl s2 ; Sampler for chroma Cr texture
857 * decl s3 ; Sampler for ref surface texture
858 */
859 for (i = 0; i < 4; ++i)
860 {
861 decl = vl_decl_samplers(i, i);
862 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
863 }
864
865 /*
866 * tex2d t1, i0, s0 ; Read texel from luma texture
867 * mov t0.x, t1.x ; Move luma sample into .x component
868 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
869 * mov t0.y, t1.x ; Move Cb sample into .y component
870 * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
871 * mov t0.z, t1.x ; Move Cr sample into .z component
872 */
873 for (i = 0; i < 3; ++i)
874 {
875 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
876 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
877
878 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
879 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
880 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
881 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
882 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
883 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
884
885 }
886
887 /* mul t0, t0, c0 ; Rescale texel to correct range */
888 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
889 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
890
891 /*
892 * tex2d t1, i2, s3 ; Read texel from ref macroblock top field
893 * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field
894 */
895 for (i = 0; i < 2; ++i)
896 {
897 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
898 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
899 }
900
901 /* XXX: Pos values off by 0.5? */
902 /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */
903 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1);
904 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
905 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
906 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
907 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
908 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
909 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
910 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
911 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
912 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
913
914 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
915 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
916 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
917 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
918 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
919 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
920 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
921
922 /* floor t3, t3 ; Get rid of fractional part */
923 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
924 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
925
926 /* mul t3, t3, c1.y ; Multiply by 2 */
927 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
928 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
929 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
930 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
931 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
932 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
933
934 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
935 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
936 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
937
938 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
939 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
940 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
941 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
942
943 /* add o0, t0, t1 ; Add ref and differential to form final output */
944 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
945 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
946
947 /* end */
948 inst = vl_end();
949 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
950
951 fs.tokens = tokens;
952 context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs);
953 free(tokens);
954
955 return 0;
956 }
957
958 static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context)
959 {
960 const unsigned int max_tokens = 100;
961
962 struct pipe_context *pipe;
963 struct pipe_shader_state vs;
964 struct tgsi_token *tokens;
965 struct tgsi_header *header;
966
967 struct tgsi_full_declaration decl;
968 struct tgsi_full_instruction inst;
969
970 unsigned int ti;
971 unsigned int i;
972
973 assert(context);
974
975 pipe = context->pipe;
976 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
977
978 /* Version */
979 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
980 /* Header */
981 header = (struct tgsi_header*)&tokens[1];
982 *header = tgsi_build_header();
983 /* Processor */
984 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
985
986 ti = 3;
987
988 /*
989 * decl i0 ; Vertex pos
990 * decl i1 ; Luma texcoords
991 * decl i2 ; Chroma texcoords
992 */
993 for (i = 0; i < 3; i++)
994 {
995 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
996 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
997 }
998
999 /*
1000 * decl c0 ; Scaling vector to scale unit rect to macroblock size
1001 * decl c1 ; Translation vector to move macroblock into position
1002 * decl c2 ; Unused
1003 * decl c3 ; Translation vector to move past ref macroblock texcoords into position
1004 * decl c4 ; Unused
1005 * decl c5 ; Translation vector to move future ref macroblock texcoords into position
1006 */
1007 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5);
1008 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1009
1010 /*
1011 * decl o0 ; Vertex pos
1012 * decl o1 ; Luma texcoords
1013 * decl o2 ; Chroma texcoords
1014 * decl o3 ; Past ref macroblock texcoords
1015 * decl o4 ; Future ref macroblock texcoords
1016 */
1017 for (i = 0; i < 5; i++)
1018 {
1019 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1020 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1021 }
1022
1023 /* decl t0 */
1024 decl = vl_decl_temps(0, 0);
1025 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1026
1027 /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */
1028 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1029 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1030
1031 /* add o0, t0, c1 ; Translate rect into position */
1032 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
1033 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1034
1035 /*
1036 * mov o1, i1 ; Move input luma texcoords to output
1037 * mov o2, i2 ; Move input chroma texcoords to output
1038 */
1039 for (i = 1; i < 3; ++i)
1040 {
1041 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1042 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1043 }
1044
1045 /* add o3, t0, c3 ; Translate rect into position on past ref macroblock
1046 add o4, t0, c5 ; Translate rect into position on future ref macroblock */
1047 for (i = 0; i < 2; ++i)
1048 {
1049 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3);
1050 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1051 }
1052
1053 /* end */
1054 inst = vl_end();
1055 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1056
1057 vs.tokens = tokens;
1058 context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs);
1059 free(tokens);
1060
1061 return 0;
1062 }
1063
1064 static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context)
1065 {
1066 const unsigned int max_tokens = 100;
1067
1068 struct pipe_context *pipe;
1069 struct pipe_shader_state vs;
1070 struct tgsi_token *tokens;
1071 struct tgsi_header *header;
1072
1073 struct tgsi_full_declaration decl;
1074 struct tgsi_full_instruction inst;
1075
1076 unsigned int ti;
1077 unsigned int i;
1078
1079 assert(context);
1080
1081 pipe = context->pipe;
1082 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1083
1084 /* Version */
1085 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1086 /* Header */
1087 header = (struct tgsi_header*)&tokens[1];
1088 *header = tgsi_build_header();
1089 /* Processor */
1090 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1091
1092 ti = 3;
1093
1094 /*
1095 * decl i0 ; Vertex pos
1096 * decl i1 ; Luma texcoords
1097 * decl i2 ; Chroma texcoords
1098 */
1099 for (i = 0; i < 3; i++)
1100 {
1101 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1102 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1103 }
1104
1105 /*
1106 * decl c0 ; Scaling vector to scale unit rect to macroblock size
1107 * decl c1 ; Translation vector to move macroblock into position
1108 * decl c2 ; Denorm coefficients
1109 * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position
1110 * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position
1111 * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position
1112 * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position
1113 */
1114 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6);
1115 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1116
1117 /*
1118 * decl o0 ; Vertex pos
1119 * decl o1 ; Luma texcoords
1120 * decl o2 ; Chroma texcoords
1121 * decl o3 ; Top field past ref macroblock texcoords
1122 * decl o4 ; Bottom field past ref macroblock texcoords
1123 * decl o5 ; Top field future ref macroblock texcoords
1124 * decl o6 ; Bottom field future ref macroblock texcoords
1125 * decl o7 ; Denormalized vertex pos
1126 */
1127 for (i = 0; i < 8; i++)
1128 {
1129 decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1130 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1131 }
1132
1133 /* decl t0, t1 */
1134 decl = vl_decl_temps(0, 1);
1135 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1136
1137 /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */
1138 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
1139 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1140
1141 /* add t1, t0, c1 ; Translate rect into position */
1142 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
1143 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1144
1145 /* mov o0, t1 ; Move vertex pos to output */
1146 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1);
1147 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1148
1149 /*
1150 * mov o1, i1 ; Move input luma texcoords to output
1151 * mov o2, i2 ; Move input chroma texcoords to output
1152 */
1153 for (i = 1; i < 3; ++i)
1154 {
1155 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
1156 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1157 }
1158
1159 /*
1160 * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock
1161 * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock
1162 * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock
1163 * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock
1164 */
1165 for (i = 0; i < 4; ++i)
1166 {
1167 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3);
1168 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1169 }
1170
1171 /* mul o7, t1, c2 ; Denorm vertex pos */
1172 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2);
1173 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1174
1175 /* end */
1176 inst = vl_end();
1177 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1178
1179 vs.tokens = tokens;
1180 context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs);
1181 free(tokens);
1182
1183 return 0;
1184 }
1185
1186 static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context)
1187 {
1188 const unsigned int max_tokens = 100;
1189
1190 struct pipe_context *pipe;
1191 struct pipe_shader_state fs;
1192 struct tgsi_token *tokens;
1193 struct tgsi_header *header;
1194
1195 struct tgsi_full_declaration decl;
1196 struct tgsi_full_instruction inst;
1197
1198 unsigned int ti;
1199 unsigned int i;
1200
1201 assert(context);
1202
1203 pipe = context->pipe;
1204 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1205
1206 /* Version */
1207 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1208 /* Header */
1209 header = (struct tgsi_header*)&tokens[1];
1210 *header = tgsi_build_header();
1211 /* Processor */
1212 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1213
1214 ti = 3;
1215
1216 /*
1217 * decl i0 ; Texcoords for s0
1218 * decl i1 ; Texcoords for s1, s2
1219 * decl i2 ; Texcoords for s3
1220 * decl i3 ; Texcoords for s4
1221 */
1222 for (i = 0; i < 4; ++i)
1223 {
1224 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1225 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1226 }
1227
1228 /*
1229 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1230 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
1231 */
1232 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1233 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1234
1235 /* decl o0 ; Fragment color */
1236 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1237 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1238
1239 /* decl t0-t2 */
1240 decl = vl_decl_temps(0, 2);
1241 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1242
1243 /*
1244 * decl s0 ; Sampler for luma texture
1245 * decl s1 ; Sampler for chroma Cb texture
1246 * decl s2 ; Sampler for chroma Cr texture
1247 * decl s3 ; Sampler for past ref surface texture
1248 * decl s4 ; Sampler for future ref surface texture
1249 */
1250 for (i = 0; i < 5; ++i)
1251 {
1252 decl = vl_decl_samplers(i, i);
1253 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1254 }
1255
1256 /*
1257 * tex2d t1, i0, s0 ; Read texel from luma texture
1258 * mov t0.x, t1.x ; Move luma sample into .x component
1259 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1260 * mov t0.y, t1.x ; Move Cb sample into .y component
1261 * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
1262 * mov t0.z, t1.x ; Move Cr sample into .z component
1263 */
1264 for (i = 0; i < 3; ++i)
1265 {
1266 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
1267 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1268
1269 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1270 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1271 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1272 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1273 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1274 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1275
1276 }
1277
1278 /* mul t0, t0, c0 ; Rescale texel to correct range */
1279 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1280 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1281
1282 /*
1283 * tex2d t1, i2, s3 ; Read texel from past ref macroblock
1284 * tex2d t2, i3, s4 ; Read texel from future ref macroblock
1285 */
1286 for (i = 0; i < 2; ++i)
1287 {
1288 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3);
1289 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1290 }
1291
1292 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1293 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1294 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1295 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1296 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1297 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1298 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1299
1300 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1301 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1302 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1303
1304 /* end */
1305 inst = vl_end();
1306 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1307
1308 fs.tokens = tokens;
1309 context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs);
1310 free(tokens);
1311
1312 return 0;
1313 }
1314
1315 static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context)
1316 {
1317 const unsigned int max_tokens = 200;
1318
1319 struct pipe_context *pipe;
1320 struct pipe_shader_state fs;
1321 struct tgsi_token *tokens;
1322 struct tgsi_header *header;
1323
1324 struct tgsi_full_declaration decl;
1325 struct tgsi_full_instruction inst;
1326
1327 unsigned int ti;
1328 unsigned int i;
1329
1330 assert(context);
1331
1332 pipe = context->pipe;
1333 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1334
1335 /* Version */
1336 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1337 /* Header */
1338 header = (struct tgsi_header*)&tokens[1];
1339 *header = tgsi_build_header();
1340 /* Processor */
1341 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1342
1343 ti = 3;
1344
1345 /*
1346 * decl i0 ; Texcoords for s0
1347 * decl i1 ; Texcoords for s1, s2
1348 * decl i2 ; Texcoords for s3
1349 * decl i3 ; Texcoords for s3
1350 * decl i4 ; Texcoords for s4
1351 * decl i5 ; Texcoords for s4
1352 * decl i6 ; Denormalized vertex pos
1353 */
1354 for (i = 0; i < 7; ++i)
1355 {
1356 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1357 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1358 }
1359
1360 /*
1361 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1362 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1363 * ; and for Y-mod-2 top/bottom field selection
1364 */
1365 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1366 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1367
1368 /* decl o0 ; Fragment color */
1369 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1370 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1371
1372 /* decl t0-t5 */
1373 decl = vl_decl_temps(0, 5);
1374 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1375
1376 /*
1377 * decl s0 ; Sampler for luma texture
1378 * decl s1 ; Sampler for chroma Cb texture
1379 * decl s2 ; Sampler for chroma Cr texture
1380 * decl s3 ; Sampler for past ref surface texture
1381 * decl s4 ; Sampler for future ref surface texture
1382 */
1383 for (i = 0; i < 5; ++i)
1384 {
1385 decl = vl_decl_samplers(i, i);
1386 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1387 }
1388
1389 /*
1390 * tex2d t1, i0, s0 ; Read texel from luma texture
1391 * mov t0.x, t1.x ; Move luma sample into .x component
1392 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1393 * mov t0.y, t1.x ; Move Cb sample into .y component
1394 * tex2d t1, i1, s2 ; Read texel from chroma Cr texture
1395 * mov t0.z, t1.x ; Move Cr sample into .z component
1396 */
1397 for (i = 0; i < 3; ++i)
1398 {
1399 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i);
1400 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1401
1402 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1403 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1404 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1405 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1406 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1407 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1408
1409 }
1410
1411 /* mul t0, t0, c0 ; Rescale texel to correct range */
1412 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1413 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1414
1415 /* XXX: Pos values off by 0.5? */
1416 /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */
1417 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1);
1418 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1419 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1420 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1421 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1422 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1423 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1424 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1425 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1426 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1427
1428 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1429 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1430 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1431 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1432 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1433 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1434 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1435
1436 /* floor t3, t3 ; Get rid of fractional part */
1437 inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1438 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1439
1440 /* mul t3, t3, c1.y ; Multiply by 2 */
1441 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1442 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1443 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1444 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1445 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1446 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1447
1448 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1449 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1450 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1451
1452 /*
1453 * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field
1454 * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field
1455 */
1456 for (i = 0; i < 2; ++i)
1457 {
1458 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3);
1459 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1460 }
1461
1462 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1463 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1464 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1465 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1466
1467 /*
1468 * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field
1469 * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field
1470 */
1471 for (i = 0; i < 2; ++i)
1472 {
1473 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4);
1474 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1475 }
1476
1477 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1478 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
1479 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
1480 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1481
1482 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1483 inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1484 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1485 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1486 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1487 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1488 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1489
1490 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1491 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1492 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1493
1494 /* end */
1495 inst = vl_end();
1496 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1497
1498 fs.tokens = tokens;
1499 context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs);
1500 free(tokens);
1501
1502 return 0;
1503 }
1504
1505 int vlCreateDataBufsMC(struct VL_CONTEXT *context)
1506 {
1507 struct pipe_context *pipe;
1508 unsigned int i;
1509
1510 assert(context);
1511
1512 pipe = context->pipe;
1513
1514 /* Create our vertex buffer and vertex buffer element */
1515 context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F);
1516 context->states.mc.vertex_bufs[0].max_index = 23;
1517 context->states.mc.vertex_bufs[0].buffer_offset = 0;
1518 context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create
1519 (
1520 pipe->winsys,
1521 1,
1522 PIPE_BUFFER_USAGE_VERTEX,
1523 sizeof(struct VL_VERTEX2F) * 24
1524 );
1525
1526 context->states.mc.vertex_buf_elems[0].src_offset = 0;
1527 context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0;
1528 context->states.mc.vertex_buf_elems[0].nr_components = 2;
1529 context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
1530
1531 /* Create our texcoord buffers and texcoord buffer elements */
1532 for (i = 1; i < 3; ++i)
1533 {
1534 context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F);
1535 context->states.mc.vertex_bufs[i].max_index = 23;
1536 context->states.mc.vertex_bufs[i].buffer_offset = 0;
1537 context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create
1538 (
1539 pipe->winsys,
1540 1,
1541 PIPE_BUFFER_USAGE_VERTEX,
1542 sizeof(struct VL_TEXCOORD2F) * 24
1543 );
1544
1545 context->states.mc.vertex_buf_elems[i].src_offset = 0;
1546 context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i;
1547 context->states.mc.vertex_buf_elems[i].nr_components = 2;
1548 context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT;
1549 }
1550
1551 /* Fill buffers */
1552 memcpy
1553 (
1554 pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1555 vl_chroma_420_texcoords,
1556 sizeof(struct VL_VERTEX2F) * 24
1557 );
1558 memcpy
1559 (
1560 pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1561 vl_luma_texcoords,
1562 sizeof(struct VL_TEXCOORD2F) * 24
1563 );
1564 /* TODO: Accomodate 422, 444 */
1565 memcpy
1566 (
1567 pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1568 vl_chroma_420_texcoords,
1569 sizeof(struct VL_TEXCOORD2F) * 24
1570 );
1571
1572 for (i = 0; i < 3; ++i)
1573 pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer);
1574
1575 /* Create our constant buffer */
1576 context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS);
1577 context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create
1578 (
1579 pipe->winsys,
1580 1,
1581 PIPE_BUFFER_USAGE_CONSTANT,
1582 context->states.mc.vs_const_buf.size
1583 );
1584
1585 context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS);
1586 context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create
1587 (
1588 pipe->winsys,
1589 1,
1590 PIPE_BUFFER_USAGE_CONSTANT,
1591 context->states.mc.fs_const_buf.size
1592 );
1593
1594 memcpy
1595 (
1596 pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1597 &vl_mc_fs_consts,
1598 sizeof(struct VL_MC_FS_CONSTS)
1599 );
1600
1601 pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer);
1602
1603 return 0;
1604 }
1605
1606 static int vlInitMC(struct VL_CONTEXT *context)
1607 {
1608 struct pipe_context *pipe;
1609 struct pipe_sampler_state sampler;
1610 struct pipe_texture template;
1611 unsigned int filters[5];
1612 unsigned int i;
1613
1614 assert(context);
1615
1616 pipe = context->pipe;
1617
1618 /* For MC we render to textures, which are rounded up to nearest POT */
1619 context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width);
1620 context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height);
1621 context->states.mc.viewport.scale[2] = 1;
1622 context->states.mc.viewport.scale[3] = 1;
1623 context->states.mc.viewport.translate[0] = 0;
1624 context->states.mc.viewport.translate[1] = 0;
1625 context->states.mc.viewport.translate[2] = 0;
1626 context->states.mc.viewport.translate[3] = 0;
1627
1628 context->states.mc.render_target.width = vlRoundUpPOT(context->video_width);
1629 context->states.mc.render_target.height = vlRoundUpPOT(context->video_height);
1630 context->states.mc.render_target.num_cbufs = 1;
1631 /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */
1632 context->states.mc.render_target.zsbuf = NULL;
1633
1634 filters[0] = PIPE_TEX_FILTER_NEAREST;
1635 filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
1636 filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
1637 filters[3] = PIPE_TEX_FILTER_LINEAR;
1638 filters[4] = PIPE_TEX_FILTER_LINEAR;
1639
1640 for (i = 0; i < 5; ++i)
1641 {
1642 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1643 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1644 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1645 sampler.min_img_filter = filters[i];
1646 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
1647 sampler.mag_img_filter = filters[i];
1648 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
1649 sampler.compare_func = PIPE_FUNC_ALWAYS;
1650 sampler.normalized_coords = 1;
1651 /*sampler.prefilter = ;*/
1652 /*sampler.shadow_ambient = ;*/
1653 /*sampler.lod_bias = ;*/
1654 sampler.min_lod = 0;
1655 /*sampler.max_lod = ;*/
1656 /*sampler.border_color[i] = ;*/
1657 /*sampler.max_anisotropy = ;*/
1658 context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler);
1659 }
1660
1661 memset(&template, 0, sizeof(struct pipe_texture));
1662 template.target = PIPE_TEXTURE_2D;
1663 template.format = PIPE_FORMAT_R16_SNORM;
1664 template.last_level = 0;
1665 template.width[0] = 8;
1666 template.height[0] = 8 * 4;
1667 template.depth[0] = 1;
1668 template.compressed = 0;
1669 pf_get_block(template.format, &template.block);
1670
1671 context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template);
1672
1673 if (context->video_format == VL_FORMAT_YCBCR_420)
1674 template.height[0] = 8;
1675 else if (context->video_format == VL_FORMAT_YCBCR_422)
1676 template.height[0] = 8 * 2;
1677 else if (context->video_format == VL_FORMAT_YCBCR_444)
1678 template.height[0] = 8 * 4;
1679 else
1680 assert(0);
1681
1682 context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template);
1683 context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template);
1684
1685 /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */
1686
1687 vlCreateVertexShaderIMC(context);
1688 vlCreateFragmentShaderIMC(context);
1689 vlCreateVertexShaderFramePMC(context);
1690 vlCreateVertexShaderFieldPMC(context);
1691 vlCreateFragmentShaderFramePMC(context);
1692 vlCreateFragmentShaderFieldPMC(context);
1693 vlCreateVertexShaderFrameBMC(context);
1694 vlCreateVertexShaderFieldBMC(context);
1695 vlCreateFragmentShaderFrameBMC(context);
1696 vlCreateFragmentShaderFieldBMC(context);
1697 vlCreateDataBufsMC(context);
1698
1699 return 0;
1700 }
1701
1702 static int vlDestroyMC(struct VL_CONTEXT *context)
1703 {
1704 unsigned int i;
1705
1706 assert(context);
1707
1708 for (i = 0; i < 5; ++i)
1709 context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]);
1710
1711 for (i = 0; i < 3; ++i)
1712 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer);
1713
1714 /* Textures 3 & 4 are not created directly, no need to release them here */
1715 for (i = 0; i < 3; ++i)
1716 pipe_texture_release(&context->states.mc.textures[i]);
1717
1718 context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs);
1719 context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs);
1720
1721 for (i = 0; i < 2; ++i)
1722 {
1723 context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]);
1724 context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]);
1725 context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]);
1726 context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]);
1727 }
1728
1729 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer);
1730 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer);
1731
1732 return 0;
1733 }
1734
1735 static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context)
1736 {
1737 const unsigned int max_tokens = 50;
1738
1739 struct pipe_context *pipe;
1740 struct pipe_shader_state vs;
1741 struct tgsi_token *tokens;
1742 struct tgsi_header *header;
1743
1744 struct tgsi_full_declaration decl;
1745 struct tgsi_full_instruction inst;
1746
1747 unsigned int ti;
1748 unsigned int i;
1749
1750 assert(context);
1751
1752 pipe = context->pipe;
1753 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1754
1755 /* Version */
1756 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1757 /* Header */
1758 header = (struct tgsi_header*)&tokens[1];
1759 *header = tgsi_build_header();
1760 /* Processor */
1761 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
1762
1763 ti = 3;
1764
1765 /*
1766 * decl i0 ; Vertex pos
1767 * decl i1 ; Vertex texcoords
1768 */
1769 for (i = 0; i < 2; i++)
1770 {
1771 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1772 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1773 }
1774
1775 /*
1776 * decl c0 ; Scaling vector to scale texcoord rect to source size
1777 * decl c1 ; Translation vector to move texcoord rect into position
1778 */
1779 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1780 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1781
1782 /*
1783 * decl o0 ; Vertex pos
1784 * decl o1 ; Vertex texcoords
1785 */
1786 for (i = 0; i < 2; i++)
1787 {
1788 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
1789 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1790 }
1791
1792 /* decl t0 */
1793 decl = vl_decl_temps(0, 0);
1794 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1795
1796 /* mov o0, i0 ; Move pos in to pos out */
1797 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0);
1798 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1799
1800 /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */
1801 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0);
1802 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1803
1804 /* add o1, t0, c1 ; Translate texcoord rect into position */
1805 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1);
1806 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1807
1808 /* end */
1809 inst = vl_end();
1810 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1811
1812 vs.tokens = tokens;
1813 context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs);
1814 //free(tokens);
1815
1816 return 0;
1817 }
1818
1819 static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context)
1820 {
1821 const unsigned int max_tokens = 50;
1822
1823 struct pipe_context *pipe;
1824 struct pipe_shader_state fs;
1825 struct tgsi_token *tokens;
1826 struct tgsi_header *header;
1827
1828 struct tgsi_full_declaration decl;
1829 struct tgsi_full_instruction inst;
1830
1831 unsigned int ti;
1832 unsigned int i;
1833
1834 assert(context);
1835
1836 pipe = context->pipe;
1837 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1838
1839 /* Version */
1840 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1841 /* Header */
1842 header = (struct tgsi_header*)&tokens[1];
1843 *header = tgsi_build_header();
1844 /* Processor */
1845 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1846
1847 ti = 3;
1848
1849 /* decl i0 ; Texcoords for s0 */
1850 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
1851 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1852
1853 /*
1854 * decl c0 ; Bias vector for CSC
1855 * decl c1-c4 ; CSC matrix c1-c4
1856 */
1857 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4);
1858 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1859
1860 /* decl o0 ; Fragment color */
1861 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1862 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1863
1864 /* decl t0 */
1865 decl = vl_decl_temps(0, 0);
1866 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1867
1868 /* decl s0 ; Sampler for tex containing picture to display */
1869 decl = vl_decl_samplers(0, 0);
1870 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1871
1872 /* tex2d t0, i0, s0 ; Read src pixel */
1873 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
1874 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1875
1876 /* sub t0, t0, c0 ; Subtract bias vector from pixel */
1877 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1878 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1879
1880 /*
1881 * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix
1882 * dp4 o0.y, t0, c2
1883 * dp4 o0.z, t0, c3
1884 * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient
1885 */
1886 for (i = 0; i < 4; ++i)
1887 {
1888 inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1);
1889 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1890 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1891 }
1892
1893 /* end */
1894 inst = vl_end();
1895 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1896
1897 fs.tokens = tokens;
1898 context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs);
1899 //free(tokens);
1900
1901 return 0;
1902 }
1903
1904 static int vlCreateDataBufsCSC(struct VL_CONTEXT *context)
1905 {
1906 struct pipe_context *pipe;
1907
1908 assert(context);
1909
1910 pipe = context->pipe;
1911
1912 /*
1913 Create our vertex buffer and vertex buffer element
1914 VB contains 4 vertices that render a quad covering the entire window
1915 to display a rendered surface
1916 Quad is rendered as a tri strip
1917 */
1918 context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F);
1919 context->states.csc.vertex_bufs[0].max_index = 3;
1920 context->states.csc.vertex_bufs[0].buffer_offset = 0;
1921 context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create
1922 (
1923 pipe->winsys,
1924 1,
1925 PIPE_BUFFER_USAGE_VERTEX,
1926 sizeof(struct VL_VERTEX2F) * 4
1927 );
1928
1929 memcpy
1930 (
1931 pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1932 vl_surface_vertex_positions,
1933 sizeof(struct VL_VERTEX2F) * 4
1934 );
1935
1936 pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer);
1937
1938 context->states.csc.vertex_buf_elems[0].src_offset = 0;
1939 context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0;
1940 context->states.csc.vertex_buf_elems[0].nr_components = 2;
1941 context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
1942
1943 /*
1944 Create our texcoord buffer and texcoord buffer element
1945 Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
1946 */
1947 context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F);
1948 context->states.csc.vertex_bufs[1].max_index = 3;
1949 context->states.csc.vertex_bufs[1].buffer_offset = 0;
1950 context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create
1951 (
1952 pipe->winsys,
1953 1,
1954 PIPE_BUFFER_USAGE_VERTEX,
1955 sizeof(struct VL_TEXCOORD2F) * 4
1956 );
1957
1958 memcpy
1959 (
1960 pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
1961 vl_surface_texcoords,
1962 sizeof(struct VL_TEXCOORD2F) * 4
1963 );
1964
1965 pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer);
1966
1967 context->states.csc.vertex_buf_elems[1].src_offset = 0;
1968 context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1;
1969 context->states.csc.vertex_buf_elems[1].nr_components = 2;
1970 context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
1971
1972 /*
1973 Create our vertex shader's constant buffer
1974 Const buffer contains scaling and translation vectors
1975 */
1976 context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS);
1977 context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create
1978 (
1979 pipe->winsys,
1980 1,
1981 PIPE_BUFFER_USAGE_CONSTANT,
1982 context->states.csc.vs_const_buf.size
1983 );
1984
1985 /*
1986 Create our fragment shader's constant buffer
1987 Const buffer contains the color conversion matrix and bias vectors
1988 */
1989 context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS);
1990 context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create
1991 (
1992 pipe->winsys,
1993 1,
1994 PIPE_BUFFER_USAGE_CONSTANT,
1995 context->states.csc.fs_const_buf.size
1996 );
1997
1998 /*
1999 TODO: Refactor this into a seperate function,
2000 allow changing the CSC matrix at runtime to switch between regular & full versions
2001 */
2002 memcpy
2003 (
2004 pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
2005 &vl_csc_fs_consts_601,
2006 sizeof(struct VL_CSC_FS_CONSTS)
2007 );
2008
2009 pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer);
2010
2011 return 0;
2012 }
2013
2014 static int vlInitCSC(struct VL_CONTEXT *context)
2015 {
2016 struct pipe_context *pipe;
2017 struct pipe_sampler_state sampler;
2018
2019 assert(context);
2020
2021 pipe = context->pipe;
2022
2023 /* Delay creating the FB until vlPutSurface() so we know window size */
2024 context->states.csc.framebuffer.num_cbufs = 1;
2025 context->states.csc.framebuffer.cbufs[0] = NULL;
2026 context->states.csc.framebuffer.zsbuf = NULL;
2027
2028 sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2029 sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2030 sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2031 sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
2032 sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2033 sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
2034 sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
2035 sampler.compare_func = PIPE_FUNC_ALWAYS;
2036 sampler.normalized_coords = 1;
2037 /*sampler.prefilter = ;*/
2038 /*sampler.shadow_ambient = ;*/
2039 /*sampler.lod_bias = ;*/
2040 /*sampler.min_lod = ;*/
2041 /*sampler.max_lod = ;*/
2042 /*sampler.border_color[i] = ;*/
2043 /*sampler.max_anisotropy = ;*/
2044 context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler);
2045
2046 vlCreateVertexShaderCSC(context);
2047 vlCreateFragmentShaderCSC(context);
2048 vlCreateDataBufsCSC(context);
2049
2050 return 0;
2051 }
2052
2053 static int vlDestroyCSC(struct VL_CONTEXT *context)
2054 {
2055 assert(context);
2056
2057 /*
2058 Since we create the final FB when we display our first surface,
2059 it may not be created if vlPutSurface() is never called
2060 */
2061 if (context->states.csc.framebuffer.cbufs[0])
2062 context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]);
2063 context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler);
2064 context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader);
2065 context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader);
2066 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer);
2067 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer);
2068 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer);
2069 context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer);
2070
2071 return 0;
2072 }
2073
2074 static int vlInitCommon(struct VL_CONTEXT *context)
2075 {
2076 struct pipe_context *pipe;
2077 struct pipe_rasterizer_state rast;
2078 struct pipe_blend_state blend;
2079 struct pipe_depth_stencil_alpha_state dsa;
2080 unsigned int i;
2081
2082 assert(context);
2083
2084 pipe = context->pipe;
2085
2086 rast.flatshade = 1;
2087 rast.flatshade_first = 0;
2088 rast.light_twoside = 0;
2089 rast.front_winding = PIPE_WINDING_CCW;
2090 rast.cull_mode = PIPE_WINDING_CW;
2091 rast.fill_cw = PIPE_POLYGON_MODE_FILL;
2092 rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
2093 rast.offset_cw = 0;
2094 rast.offset_ccw = 0;
2095 rast.scissor = 0;
2096 rast.poly_smooth = 0;
2097 rast.poly_stipple_enable = 0;
2098 rast.point_sprite = 0;
2099 rast.point_size_per_vertex = 0;
2100 rast.multisample = 0;
2101 rast.line_smooth = 0;
2102 rast.line_stipple_enable = 0;
2103 rast.line_stipple_factor = 0;
2104 rast.line_stipple_pattern = 0;
2105 rast.line_last_pixel = 0;
2106 /* Don't need clipping, but viewport mapping done here */
2107 rast.bypass_clipping = 0;
2108 rast.bypass_vs = 0;
2109 rast.origin_lower_left = 0;
2110 rast.line_width = 1;
2111 rast.point_smooth = 0;
2112 rast.point_size = 1;
2113 rast.offset_units = 1;
2114 rast.offset_scale = 1;
2115 /*rast.sprite_coord_mode[i] = ;*/
2116 context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast);
2117 pipe->bind_rasterizer_state(pipe, context->states.common.raster);
2118
2119 blend.blend_enable = 0;
2120 blend.rgb_func = PIPE_BLEND_ADD;
2121 blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
2122 blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
2123 blend.alpha_func = PIPE_BLEND_ADD;
2124 blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
2125 blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
2126 blend.logicop_enable = 0;
2127 blend.logicop_func = PIPE_LOGICOP_CLEAR;
2128 /* Needed to allow color writes to FB, even if blending disabled */
2129 blend.colormask = PIPE_MASK_RGBA;
2130 blend.dither = 0;
2131 context->states.common.blend = pipe->create_blend_state(pipe, &blend);
2132 pipe->bind_blend_state(pipe, context->states.common.blend);
2133
2134 dsa.depth.enabled = 0;
2135 dsa.depth.writemask = 0;
2136 dsa.depth.func = PIPE_FUNC_ALWAYS;
2137 dsa.depth.occlusion_count = 0;
2138 for (i = 0; i < 2; ++i)
2139 {
2140 dsa.stencil[i].enabled = 0;
2141 dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
2142 dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
2143 dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
2144 dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
2145 dsa.stencil[i].ref_value = 0;
2146 dsa.stencil[i].value_mask = 0;
2147 dsa.stencil[i].write_mask = 0;
2148 }
2149 dsa.alpha.enabled = 0;
2150 dsa.alpha.func = PIPE_FUNC_ALWAYS;
2151 dsa.alpha.ref = 0;
2152 context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa);
2153 pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa);
2154
2155 return 0;
2156 }
2157
2158 static int vlDestroyCommon(struct VL_CONTEXT *context)
2159 {
2160 assert(context);
2161
2162 context->pipe->delete_blend_state(context->pipe, context->states.common.blend);
2163 context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster);
2164 context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa);
2165
2166 return 0;
2167 }
2168
2169 static int vlInit(struct VL_CONTEXT *context)
2170 {
2171 assert(context);
2172
2173 vlInitCommon(context);
2174 vlInitCSC(context);
2175 vlInitMC(context);
2176 vlInitIDCT(context);
2177
2178 return 0;
2179 }
2180
2181 static int vlDestroy(struct VL_CONTEXT *context)
2182 {
2183 assert(context);
2184
2185 /* XXX: Must unbind shaders before we can delete them for some reason */
2186 context->pipe->bind_vs_state(context->pipe, NULL);
2187 context->pipe->bind_fs_state(context->pipe, NULL);
2188
2189 vlDestroyCommon(context);
2190 vlDestroyCSC(context);
2191 vlDestroyMC(context);
2192 vlDestroyIDCT(context);
2193
2194 return 0;
2195 }
2196
2197 int vlCreateContext
2198 (
2199 Display *display,
2200 struct pipe_context *pipe,
2201 unsigned int video_width,
2202 unsigned int video_height,
2203 enum VL_FORMAT video_format,
2204 struct VL_CONTEXT **context
2205 )
2206 {
2207 struct VL_CONTEXT *ctx;
2208
2209 assert(display);
2210 assert(pipe);
2211 assert(context);
2212
2213 ctx = calloc(1, sizeof(struct VL_CONTEXT));
2214
2215 ctx->display = display;
2216 ctx->pipe = pipe;
2217 ctx->video_width = video_width;
2218 ctx->video_height = video_height;
2219 ctx->video_format = video_format;
2220
2221 vlInit(ctx);
2222
2223 /* Since we only change states in vlPutSurface() we need to start in render mode */
2224 vlBeginRender(ctx);
2225
2226 *context = ctx;
2227
2228 return 0;
2229 }
2230
2231 int vlDestroyContext(struct VL_CONTEXT *context)
2232 {
2233 assert(context);
2234
2235 vlDestroy(context);
2236
2237 free(context);
2238
2239 return 0;
2240 }
2241
2242 int vlBeginRender(struct VL_CONTEXT *context)
2243 {
2244 struct pipe_context *pipe;
2245
2246 assert(context);
2247
2248 pipe = context->pipe;
2249
2250 /* Frame buffer set in vlRender*Macroblock() */
2251 /* Shaders, samplers, textures set in vlRender*Macroblock() */
2252 pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs);
2253 pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems);
2254 pipe->set_viewport_state(pipe, &context->states.mc.viewport);
2255 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf);
2256 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf);
2257
2258 return 0;
2259 }
2260
2261 int vlEndRender(struct VL_CONTEXT *context)
2262 {
2263 struct pipe_context *pipe;
2264
2265 assert(context);
2266
2267 pipe = context->pipe;
2268
2269 pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer);
2270 pipe->set_viewport_state(pipe, &context->states.csc.viewport);
2271 pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler);
2272 /* Source texture set in vlPutSurface() */
2273 pipe->bind_vs_state(pipe, context->states.csc.vertex_shader);
2274 pipe->bind_fs_state(pipe, context->states.csc.fragment_shader);
2275 pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs);
2276 pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems);
2277 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf);
2278 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf);
2279
2280 return 0;
2281 }
2282