Merge branch 'mesa_7_6_branch'
[mesa.git] / src / gallium / state_trackers / g3dvl / vl_r16snorm_mc_buf_shaders.inc
1 static int vlCreateVertexShaderIMB
2 (
3 struct vlR16SnormBufferedMC *mc
4 )
5 {
6 const unsigned int max_tokens = 50;
7
8 struct pipe_context *pipe;
9 struct pipe_shader_state vs;
10 struct tgsi_token *tokens;
11 struct tgsi_header *header;
12
13 struct tgsi_full_declaration decl;
14 struct tgsi_full_instruction inst;
15
16 unsigned int ti;
17 unsigned int i;
18
19 assert(mc);
20
21 pipe = mc->pipe;
22 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
23
24 /* Version */
25 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
26 /* Header */
27 header = (struct tgsi_header*)&tokens[1];
28 *header = tgsi_build_header();
29 /* Processor */
30 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
31
32 ti = 3;
33
34 /*
35 * decl i0 ; Vertex pos
36 * decl i1 ; Luma texcoords
37 * decl i2 ; Chroma Cb texcoords
38 * decl i3 ; Chroma Cr texcoords
39 */
40 for (i = 0; i < 4; i++)
41 {
42 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
43 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
44 }
45
46 /*
47 * decl o0 ; Vertex pos
48 * decl o1 ; Luma texcoords
49 * decl o2 ; Chroma Cb texcoords
50 * decl o3 ; Chroma Cr texcoords
51 */
52 for (i = 0; i < 4; i++)
53 {
54 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
55 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
56 }
57
58 /*
59 * mov o0, i0 ; Move input vertex pos to output
60 * mov o1, i1 ; Move input luma texcoords to output
61 * mov o2, i2 ; Move input chroma Cb texcoords to output
62 * mov o3, i3 ; Move input chroma Cr texcoords to output
63 */
64 for (i = 0; i < 4; ++i)
65 {
66 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
67 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
68 }
69
70 /* end */
71 inst = vl_end();
72 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
73
74 vs.tokens = tokens;
75 mc->i_vs = pipe->create_vs_state(pipe, &vs);
76 free(tokens);
77
78 return 0;
79 }
80
81 static int vlCreateFragmentShaderIMB
82 (
83 struct vlR16SnormBufferedMC *mc
84 )
85 {
86 const unsigned int max_tokens = 100;
87
88 struct pipe_context *pipe;
89 struct pipe_shader_state fs;
90 struct tgsi_token *tokens;
91 struct tgsi_header *header;
92
93 struct tgsi_full_declaration decl;
94 struct tgsi_full_instruction inst;
95
96 unsigned int ti;
97 unsigned int i;
98
99 assert(mc);
100
101 pipe = mc->pipe;
102 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
103
104 /* Version */
105 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
106 /* Header */
107 header = (struct tgsi_header*)&tokens[1];
108 *header = tgsi_build_header();
109 /* Processor */
110 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
111
112 ti = 3;
113
114 /*
115 * decl i0 ; Luma texcoords
116 * decl i1 ; Chroma Cb texcoords
117 * decl i2 ; Chroma Cr texcoords
118 */
119 for (i = 0; i < 3; ++i)
120 {
121 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
122 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
123 }
124
125 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
126 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
127 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
128
129 /* decl o0 ; Fragment color */
130 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
131 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
132
133 /* decl t0, t1 */
134 decl = vl_decl_temps(0, 1);
135 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
136
137 /*
138 * decl s0 ; Sampler for luma texture
139 * decl s1 ; Sampler for chroma Cb texture
140 * decl s2 ; Sampler for chroma Cr texture
141 */
142 for (i = 0; i < 3; ++i)
143 {
144 decl = vl_decl_samplers(i, i);
145 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti);
146 }
147
148 /*
149 * tex2d t1, i0, s0 ; Read texel from luma texture
150 * mov t0.x, t1.x ; Move luma sample into .x component
151 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
152 * mov t0.y, t1.x ; Move Cb sample into .y component
153 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
154 * mov t0.z, t1.x ; Move Cr sample into .z component
155 */
156 for (i = 0; i < 3; ++i)
157 {
158 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
159 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
160
161 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
162 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
163 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
164 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
165 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
166 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
167 }
168
169 /* mul o0, t0, c0 ; Rescale texel to correct range */
170 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
171 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
172
173 /* end */
174 inst = vl_end();
175 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
176
177 fs.tokens = tokens;
178 mc->i_fs = pipe->create_fs_state(pipe, &fs);
179 free(tokens);
180
181 return 0;
182 }
183
184 static int vlCreateVertexShaderFramePMB
185 (
186 struct vlR16SnormBufferedMC *mc
187 )
188 {
189 const unsigned int max_tokens = 100;
190
191 struct pipe_context *pipe;
192 struct pipe_shader_state vs;
193 struct tgsi_token *tokens;
194 struct tgsi_header *header;
195
196 struct tgsi_full_declaration decl;
197 struct tgsi_full_instruction inst;
198
199 unsigned int ti;
200 unsigned int i;
201
202 assert(mc);
203
204 pipe = mc->pipe;
205 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
206
207 /* Version */
208 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
209 /* Header */
210 header = (struct tgsi_header*)&tokens[1];
211 *header = tgsi_build_header();
212 /* Processor */
213 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
214
215 ti = 3;
216
217 /*
218 * decl i0 ; Vertex pos
219 * decl i1 ; Luma texcoords
220 * decl i2 ; Chroma Cb texcoords
221 * decl i3 ; Chroma Cr texcoords
222 * decl i4 ; Ref surface top field texcoords
223 * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream)
224 */
225 for (i = 0; i < 6; i++)
226 {
227 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
228 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
229 }
230
231 /*
232 * decl o0 ; Vertex pos
233 * decl o1 ; Luma texcoords
234 * decl o2 ; Chroma Cb texcoords
235 * decl o3 ; Chroma Cr texcoords
236 * decl o4 ; Ref macroblock texcoords
237 */
238 for (i = 0; i < 5; i++)
239 {
240 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
241 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
242 }
243
244 /*
245 * mov o0, i0 ; Move input vertex pos to output
246 * mov o1, i1 ; Move input luma texcoords to output
247 * mov o2, i2 ; Move input chroma Cb texcoords to output
248 * mov o3, i3 ; Move input chroma Cr texcoords to output
249 */
250 for (i = 0; i < 4; ++i)
251 {
252 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
253 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
254 }
255
256 /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */
257 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
258 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
259
260 /* end */
261 inst = vl_end();
262 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
263
264 vs.tokens = tokens;
265 mc->p_vs[0] = pipe->create_vs_state(pipe, &vs);
266 free(tokens);
267
268 return 0;
269 }
270
271 static int vlCreateVertexShaderFieldPMB
272 (
273 struct vlR16SnormBufferedMC *mc
274 )
275 {
276 const unsigned int max_tokens = 100;
277
278 struct pipe_context *pipe;
279 struct pipe_shader_state vs;
280 struct tgsi_token *tokens;
281 struct tgsi_header *header;
282
283 struct tgsi_full_declaration decl;
284 struct tgsi_full_instruction inst;
285
286 unsigned int ti;
287 unsigned int i;
288
289 assert(mc);
290
291 pipe = mc->pipe;
292 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
293
294 /* Version */
295 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
296 /* Header */
297 header = (struct tgsi_header*)&tokens[1];
298 *header = tgsi_build_header();
299 /* Processor */
300 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
301
302 ti = 3;
303
304 /*
305 * decl i0 ; Vertex pos
306 * decl i1 ; Luma texcoords
307 * decl i2 ; Chroma Cb texcoords
308 * decl i3 ; Chroma Cr texcoords
309 * decl i4 ; Ref macroblock top field texcoords
310 * decl i5 ; Ref macroblock bottom field texcoords
311 */
312 for (i = 0; i < 6; i++)
313 {
314 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
315 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
316 }
317
318 /* decl c0 ; Render target dimensions */
319 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
320 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
321
322 /*
323 * decl o0 ; Vertex pos
324 * decl o1 ; Luma texcoords
325 * decl o2 ; Chroma Cb texcoords
326 * decl o3 ; Chroma Cr texcoords
327 * decl o4 ; Ref macroblock top field texcoords
328 * decl o5 ; Ref macroblock bottom field texcoords
329 * decl o6 ; Denormalized vertex pos
330 */
331 for (i = 0; i < 7; i++)
332 {
333 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
334 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
335 }
336
337 /*
338 * mov o0, i0 ; Move input vertex pos to output
339 * mov o1, i1 ; Move input luma texcoords to output
340 * mov o2, i2 ; Move input chroma Cb texcoords to output
341 * mov o3, i3 ; Move input chroma Cr texcoords to output
342 */
343 for (i = 0; i < 4; ++i)
344 {
345 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
346 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
347 }
348
349 /*
350 * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords
351 * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords
352 */
353 for (i = 0; i < 2; ++i)
354 {
355 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
356 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
357 }
358
359 /* mul o6, i0, c0 ; Denorm vertex pos */
360 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
361 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
362
363 /* end */
364 inst = vl_end();
365 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
366
367 vs.tokens = tokens;
368 mc->p_vs[1] = pipe->create_vs_state(pipe, &vs);
369 free(tokens);
370
371 return 0;
372 }
373
374 static int vlCreateFragmentShaderFramePMB
375 (
376 struct vlR16SnormBufferedMC *mc
377 )
378 {
379 const unsigned int max_tokens = 100;
380
381 struct pipe_context *pipe;
382 struct pipe_shader_state fs;
383 struct tgsi_token *tokens;
384 struct tgsi_header *header;
385
386 struct tgsi_full_declaration decl;
387 struct tgsi_full_instruction inst;
388
389 unsigned int ti;
390 unsigned int i;
391
392 assert(mc);
393
394 pipe = mc->pipe;
395 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
396
397 /* Version */
398 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
399 /* Header */
400 header = (struct tgsi_header*)&tokens[1];
401 *header = tgsi_build_header();
402 /* Processor */
403 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
404
405 ti = 3;
406
407 /*
408 * decl i0 ; Luma texcoords
409 * decl i1 ; Chroma Cb texcoords
410 * decl i2 ; Chroma Cr texcoords
411 * decl i3 ; Ref macroblock texcoords
412 */
413 for (i = 0; i < 4; ++i)
414 {
415 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
416 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
417 }
418
419 /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
420 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
421 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
422
423 /* decl o0 ; Fragment color */
424 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
425 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
426
427 /* decl t0, t1 */
428 decl = vl_decl_temps(0, 1);
429 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
430
431 /*
432 * decl s0 ; Sampler for luma texture
433 * decl s1 ; Sampler for chroma Cb texture
434 * decl s2 ; Sampler for chroma Cr texture
435 * decl s3 ; Sampler for ref surface texture
436 */
437 for (i = 0; i < 4; ++i)
438 {
439 decl = vl_decl_samplers(i, i);
440 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
441 }
442
443 /*
444 * tex2d t1, i0, s0 ; Read texel from luma texture
445 * mov t0.x, t1.x ; Move luma sample into .x component
446 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
447 * mov t0.y, t1.x ; Move Cb sample into .y component
448 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
449 * mov t0.z, t1.x ; Move Cr sample into .z component
450 */
451 for (i = 0; i < 3; ++i)
452 {
453 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
454 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
455
456 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
457 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
458 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
459 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
460 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
461 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
462 }
463
464 /* mul t0, t0, c0 ; Rescale texel to correct range */
465 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
466 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
467
468 /* tex2d t1, i3, s3 ; Read texel from ref macroblock */
469 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
470 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
471
472 /* add o0, t0, t1 ; Add ref and differential to form final output */
473 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
474 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
475
476 /* end */
477 inst = vl_end();
478 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
479
480 fs.tokens = tokens;
481 mc->p_fs[0] = pipe->create_fs_state(pipe, &fs);
482 free(tokens);
483
484 return 0;
485 }
486
487 static int vlCreateFragmentShaderFieldPMB
488 (
489 struct vlR16SnormBufferedMC *mc
490 )
491 {
492 const unsigned int max_tokens = 200;
493
494 struct pipe_context *pipe;
495 struct pipe_shader_state fs;
496 struct tgsi_token *tokens;
497 struct tgsi_header *header;
498
499 struct tgsi_full_declaration decl;
500 struct tgsi_full_instruction inst;
501
502 unsigned int ti;
503 unsigned int i;
504
505 assert(mc);
506
507 pipe = mc->pipe;
508 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
509
510 /* Version */
511 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
512 /* Header */
513 header = (struct tgsi_header*)&tokens[1];
514 *header = tgsi_build_header();
515 /* Processor */
516 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
517
518 ti = 3;
519
520 /*
521 * decl i0 ; Luma texcoords
522 * decl i1 ; Chroma Cb texcoords
523 * decl i2 ; Chroma Cr texcoords
524 * decl i3 ; Ref macroblock top field texcoords
525 * decl i4 ; Ref macroblock bottom field texcoords
526 * decl i5 ; Denormalized vertex pos
527 */
528 for (i = 0; i < 6; ++i)
529 {
530 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
531 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
532 }
533
534 /*
535 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
536 * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection
537 */
538 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
539 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
540
541 /* decl o0 ; Fragment color */
542 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
543 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
544
545 /* decl t0-t4 */
546 decl = vl_decl_temps(0, 4);
547 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
548
549 /*
550 * decl s0 ; Sampler for luma texture
551 * decl s1 ; Sampler for chroma Cb texture
552 * decl s2 ; Sampler for chroma Cr texture
553 * decl s3 ; Sampler for ref surface texture
554 */
555 for (i = 0; i < 4; ++i)
556 {
557 decl = vl_decl_samplers(i, i);
558 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
559 }
560
561 /*
562 * tex2d t1, i0, s0 ; Read texel from luma texture
563 * mov t0.x, t1.x ; Move luma sample into .x component
564 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
565 * mov t0.y, t1.x ; Move Cb sample into .y component
566 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
567 * mov t0.z, t1.x ; Move Cr sample into .z component
568 */
569 for (i = 0; i < 3; ++i)
570 {
571 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
572 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
573
574 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
575 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
576 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
577 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
578 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
579 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
580 }
581
582 /* mul t0, t0, c0 ; Rescale texel to correct range */
583 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
584 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
585
586 /*
587 * tex2d t1, i3, s3 ; Read texel from ref macroblock top field
588 * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field
589 */
590 for (i = 0; i < 2; ++i)
591 {
592 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
593 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
594 }
595
596 /* XXX: Pos values off by 0.5? */
597 /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */
598 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1);
599 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
600 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
601 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
602 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
603 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
604 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
605 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
606 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
607 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
608
609 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
610 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
611 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
612 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
613 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
614 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
615 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
616
617 /* floor t3, t3 ; Get rid of fractional part */
618 inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
619 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
620
621 /* mul t3, t3, c1.y ; Multiply by 2 */
622 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
623 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
624 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
625 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
626 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
627 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
628
629 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
630 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
631 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
632
633 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
634 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
635 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
636 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
637
638 /* add o0, t0, t1 ; Add ref and differential to form final output */
639 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
640 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
641
642 /* end */
643 inst = vl_end();
644 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
645
646 fs.tokens = tokens;
647 mc->p_fs[1] = pipe->create_fs_state(pipe, &fs);
648 free(tokens);
649
650 return 0;
651 }
652
653 static int vlCreateVertexShaderFrameBMB
654 (
655 struct vlR16SnormBufferedMC *mc
656 )
657 {
658 const unsigned int max_tokens = 100;
659
660 struct pipe_context *pipe;
661 struct pipe_shader_state vs;
662 struct tgsi_token *tokens;
663 struct tgsi_header *header;
664
665 struct tgsi_full_declaration decl;
666 struct tgsi_full_instruction inst;
667
668 unsigned int ti;
669 unsigned int i;
670
671 assert(mc);
672
673 pipe = mc->pipe;
674 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
675
676 /* Version */
677 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
678 /* Header */
679 header = (struct tgsi_header*)&tokens[1];
680 *header = tgsi_build_header();
681 /* Processor */
682 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
683
684 ti = 3;
685
686 /*
687 * decl i0 ; Vertex pos
688 * decl i1 ; Luma texcoords
689 * decl i2 ; Chroma Cb texcoords
690 * decl i3 ; Chroma Cr texcoords
691 * decl i4 ; First ref macroblock top field texcoords
692 * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
693 * decl i6 ; Second ref macroblock top field texcoords
694 * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
695 */
696 for (i = 0; i < 8; i++)
697 {
698 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
699 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
700 }
701
702 /*
703 * decl o0 ; Vertex pos
704 * decl o1 ; Luma texcoords
705 * decl o2 ; Chroma Cb texcoords
706 * decl o3 ; Chroma Cr texcoords
707 * decl o4 ; First ref macroblock texcoords
708 * decl o5 ; Second ref macroblock texcoords
709 */
710 for (i = 0; i < 6; i++)
711 {
712 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
713 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
714 }
715
716 /*
717 * mov o0, i0 ; Move input vertex pos to output
718 * mov o1, i1 ; Move input luma texcoords to output
719 * mov o2, i2 ; Move input chroma Cb texcoords to output
720 * mov o3, i3 ; Move input chroma Cr texcoords to output
721 */
722 for (i = 0; i < 4; ++i)
723 {
724 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
725 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
726 }
727
728 /*
729 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords
730 * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords
731 */
732 for (i = 0; i < 2; ++i)
733 {
734 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
735 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
736 }
737
738 /* end */
739 inst = vl_end();
740 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
741
742 vs.tokens = tokens;
743 mc->b_vs[0] = pipe->create_vs_state(pipe, &vs);
744 free(tokens);
745
746 return 0;
747 }
748
749 static int vlCreateVertexShaderFieldBMB
750 (
751 struct vlR16SnormBufferedMC *mc
752 )
753 {
754 const unsigned int max_tokens = 100;
755
756 struct pipe_context *pipe;
757 struct pipe_shader_state vs;
758 struct tgsi_token *tokens;
759 struct tgsi_header *header;
760
761 struct tgsi_full_declaration decl;
762 struct tgsi_full_instruction inst;
763
764 unsigned int ti;
765 unsigned int i;
766
767 assert(mc);
768
769 pipe = mc->pipe;
770 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
771
772 /* Version */
773 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
774 /* Header */
775 header = (struct tgsi_header*)&tokens[1];
776 *header = tgsi_build_header();
777 /* Processor */
778 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
779
780 ti = 3;
781
782 /*
783 * decl i0 ; Vertex pos
784 * decl i1 ; Luma texcoords
785 * decl i2 ; Chroma Cb texcoords
786 * decl i3 ; Chroma Cr texcoords
787 * decl i4 ; First ref macroblock top field texcoords
788 * decl i5 ; First ref macroblock bottom field texcoords
789 * decl i6 ; Second ref macroblock top field texcoords
790 * decl i7 ; Second ref macroblock bottom field texcoords
791 */
792 for (i = 0; i < 8; i++)
793 {
794 decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
795 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
796 }
797
798 /* decl c0 ; Render target dimensions */
799 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
800 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
801
802 /*
803 * decl o0 ; Vertex pos
804 * decl o1 ; Luma texcoords
805 * decl o2 ; Chroma Cb texcoords
806 * decl o3 ; Chroma Cr texcoords
807 * decl o4 ; First ref macroblock top field texcoords
808 * decl o5 ; First ref macroblock Bottom field texcoords
809 * decl o6 ; Second ref macroblock top field texcoords
810 * decl o7 ; Second ref macroblock Bottom field texcoords
811 * decl o8 ; Denormalized vertex pos
812 */
813 for (i = 0; i < 9; i++)
814 {
815 decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
816 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
817 }
818
819 /* decl t0, t1 */
820 decl = vl_decl_temps(0, 1);
821 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
822
823 /*
824 * mov o0, i0 ; Move input vertex pos to output
825 * mov o1, i1 ; Move input luma texcoords to output
826 * mov o2, i2 ; Move input chroma Cb texcoords to output
827 * mov o3, i3 ; Move input chroma Cr texcoords to output
828 */
829 for (i = 0; i < 4; ++i)
830 {
831 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
832 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
833 }
834
835 /*
836 * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords
837 * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords
838 * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords
839 * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords
840 */
841 for (i = 0; i < 4; ++i)
842 {
843 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4);
844 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
845 }
846
847 /* mul o8, i0, c0 ; Denorm vertex pos */
848 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0);
849 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
850
851 /* end */
852 inst = vl_end();
853 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
854
855 vs.tokens = tokens;
856 mc->b_vs[1] = pipe->create_vs_state(pipe, &vs);
857 free(tokens);
858
859 return 0;
860 }
861
862 static int vlCreateFragmentShaderFrameBMB
863 (
864 struct vlR16SnormBufferedMC *mc
865 )
866 {
867 const unsigned int max_tokens = 100;
868
869 struct pipe_context *pipe;
870 struct pipe_shader_state fs;
871 struct tgsi_token *tokens;
872 struct tgsi_header *header;
873
874 struct tgsi_full_declaration decl;
875 struct tgsi_full_instruction inst;
876
877 unsigned int ti;
878 unsigned int i;
879
880 assert(mc);
881
882 pipe = mc->pipe;
883 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
884
885 /* Version */
886 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
887 /* Header */
888 header = (struct tgsi_header*)&tokens[1];
889 *header = tgsi_build_header();
890 /* Processor */
891 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
892
893 ti = 3;
894
895 /*
896 * decl i0 ; Luma texcoords
897 * decl i1 ; Chroma Cb texcoords
898 * decl i2 ; Chroma Cr texcoords
899 * decl i3 ; First ref macroblock texcoords
900 * decl i4 ; Second ref macroblock texcoords
901 */
902 for (i = 0; i < 5; ++i)
903 {
904 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
905 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
906 }
907
908 /*
909 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
910 * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels
911 */
912 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
913 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
914
915 /* decl o0 ; Fragment color */
916 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
917 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
918
919 /* decl t0-t2 */
920 decl = vl_decl_temps(0, 2);
921 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
922
923 /*
924 * decl s0 ; Sampler for luma texture
925 * decl s1 ; Sampler for chroma Cb texture
926 * decl s2 ; Sampler for chroma Cr texture
927 * decl s3 ; Sampler for first ref surface texture
928 * decl s4 ; Sampler for second ref surface texture
929 */
930 for (i = 0; i < 5; ++i)
931 {
932 decl = vl_decl_samplers(i, i);
933 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
934 }
935
936 /*
937 * tex2d t1, i0, s0 ; Read texel from luma texture
938 * mov t0.x, t1.x ; Move luma sample into .x component
939 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
940 * mov t0.y, t1.x ; Move Cb sample into .y component
941 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
942 * mov t0.z, t1.x ; Move Cr sample into .z component
943 */
944 for (i = 0; i < 3; ++i)
945 {
946 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
947 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
948
949 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
950 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
951 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
952 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
953 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
954 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
955 }
956
957 /* mul t0, t0, c0 ; Rescale texel to correct range */
958 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
959 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
960
961 /*
962 * tex2d t1, i3, s3 ; Read texel from first ref macroblock
963 * tex2d t2, i4, s4 ; Read texel from second ref macroblock
964 */
965 for (i = 0; i < 2; ++i)
966 {
967 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
968 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
969 }
970
971 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
972 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
973 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
974 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
975 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
976 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
977 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
978
979 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
980 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
981 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
982
983 /* end */
984 inst = vl_end();
985 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
986
987 fs.tokens = tokens;
988 mc->b_fs[0] = pipe->create_fs_state(pipe, &fs);
989 free(tokens);
990
991 return 0;
992 }
993
994 static int vlCreateFragmentShaderFieldBMB
995 (
996 struct vlR16SnormBufferedMC *mc
997 )
998 {
999 const unsigned int max_tokens = 200;
1000
1001 struct pipe_context *pipe;
1002 struct pipe_shader_state fs;
1003 struct tgsi_token *tokens;
1004 struct tgsi_header *header;
1005
1006 struct tgsi_full_declaration decl;
1007 struct tgsi_full_instruction inst;
1008
1009 unsigned int ti;
1010 unsigned int i;
1011
1012 assert(mc);
1013
1014 pipe = mc->pipe;
1015 tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token));
1016
1017 /* Version */
1018 *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
1019 /* Header */
1020 header = (struct tgsi_header*)&tokens[1];
1021 *header = tgsi_build_header();
1022 /* Processor */
1023 *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
1024
1025 ti = 3;
1026
1027 /*
1028 * decl i0 ; Luma texcoords
1029 * decl i1 ; Chroma Cb texcoords
1030 * decl i2 ; Chroma Cr texcoords
1031 * decl i3 ; First ref macroblock top field texcoords
1032 * decl i4 ; First ref macroblock bottom field texcoords
1033 * decl i5 ; Second ref macroblock top field texcoords
1034 * decl i6 ; Second ref macroblock bottom field texcoords
1035 * decl i7 ; Denormalized vertex pos
1036 */
1037 for (i = 0; i < 8; ++i)
1038 {
1039 decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
1040 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1041 }
1042
1043 /*
1044 * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
1045 * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels
1046 * ; and for Y-mod-2 top/bottom field selection
1047 */
1048 decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
1049 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1050
1051 /* decl o0 ; Fragment color */
1052 decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
1053 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1054
1055 /* decl t0-t5 */
1056 decl = vl_decl_temps(0, 5);
1057 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1058
1059 /*
1060 * decl s0 ; Sampler for luma texture
1061 * decl s1 ; Sampler for chroma Cb texture
1062 * decl s2 ; Sampler for chroma Cr texture
1063 * decl s3 ; Sampler for first ref surface texture
1064 * decl s4 ; Sampler for second ref surface texture
1065 */
1066 for (i = 0; i < 5; ++i)
1067 {
1068 decl = vl_decl_samplers(i, i);
1069 ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
1070 }
1071
1072 /*
1073 * tex2d t1, i0, s0 ; Read texel from luma texture
1074 * mov t0.x, t1.x ; Move luma sample into .x component
1075 * tex2d t1, i1, s1 ; Read texel from chroma Cb texture
1076 * mov t0.y, t1.x ; Move Cb sample into .y component
1077 * tex2d t1, i2, s2 ; Read texel from chroma Cr texture
1078 * mov t0.z, t1.x ; Move Cr sample into .z component
1079 */
1080 for (i = 0; i < 3; ++i)
1081 {
1082 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
1083 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1084
1085 inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1086 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1087 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1088 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1089 inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
1090 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1091 }
1092
1093 /* mul t0, t0, c0 ; Rescale texel to correct range */
1094 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
1095 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1096
1097 /* XXX: Pos values off by 0.5? */
1098 /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */
1099 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1);
1100 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1101 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1102 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1103 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1104 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1105 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1106 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1107 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1108 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1109
1110 /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */
1111 inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1);
1112 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1113 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1114 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1115 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1116 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1117
1118 /* floor t3, t3 ; Get rid of fractional part */
1119 inst = vl_inst2(TGSI_OPCODE_FLR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3);
1120 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1121
1122 /* mul t3, t3, c1.y ; Multiply by 2 */
1123 inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1);
1124 inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
1125 inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
1126 inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
1127 inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
1128 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1129
1130 /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */
1131 inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3);
1132 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1133
1134 /*
1135 * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field
1136 * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field
1137 */
1138 for (i = 0; i < 2; ++i)
1139 {
1140 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3);
1141 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1142 }
1143
1144 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1145 /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */
1146 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1147 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1148
1149 /*
1150 * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field
1151 * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field
1152 */
1153 for (i = 0; i < 2; ++i)
1154 {
1155 inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4);
1156 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1157 }
1158
1159 /* TODO: Move to conditional tex fetch on t3 instead of lerp */
1160 /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */
1161 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5);
1162 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1163
1164 /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */
1165 inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
1166 inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
1167 inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
1168 inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
1169 inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
1170 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1171
1172 /* add o0, t0, t1 ; Add past/future ref and differential to form final output */
1173 inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
1174 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1175
1176 /* end */
1177 inst = vl_end();
1178 ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
1179
1180 fs.tokens = tokens;
1181 mc->b_fs[1] = pipe->create_fs_state(pipe, &fs);
1182 free(tokens);
1183
1184 return 0;
1185 }