r300g: Use radeon compiler for fragment programs
[mesa.git] / src / mesa / state_tracker / st_mesa_to_tgsi.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * \author
30 * Michal Krol
31 */
32
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_build.h"
37 #include "tgsi/tgsi_util.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_sanity.h"
40 #include "st_mesa_to_tgsi.h"
41 #include "shader/prog_instruction.h"
42 #include "shader/prog_parameter.h"
43 #include "shader/prog_print.h"
44 #include "util/u_debug.h"
45
46 /*
47 * Map mesa register file to TGSI register file.
48 */
49 static GLuint
50 map_register_file(
51 gl_register_file file,
52 GLuint index,
53 const GLuint immediateMapping[],
54 GLboolean indirectAccess )
55 {
56 switch( file ) {
57 case PROGRAM_UNDEFINED:
58 return TGSI_FILE_NULL;
59 case PROGRAM_TEMPORARY:
60 return TGSI_FILE_TEMPORARY;
61 /*case PROGRAM_LOCAL_PARAM:*/
62 /*case PROGRAM_ENV_PARAM:*/
63
64 /* Because of the longstanding problem with mesa arb shaders
65 * where constants, immediates and state variables are all
66 * bundled together as PROGRAM_STATE_VAR, we can't tell from the
67 * mesa register file whether this is a CONSTANT or an
68 * IMMEDIATE, hence we need all the other information.
69 */
70 case PROGRAM_STATE_VAR:
71 case PROGRAM_NAMED_PARAM:
72 case PROGRAM_UNIFORM:
73 if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0)
74 return TGSI_FILE_IMMEDIATE;
75 else
76 return TGSI_FILE_CONSTANT;
77 case PROGRAM_CONSTANT:
78 if (indirectAccess)
79 return TGSI_FILE_CONSTANT;
80 assert(immediateMapping[index] != ~0);
81 return TGSI_FILE_IMMEDIATE;
82 case PROGRAM_INPUT:
83 return TGSI_FILE_INPUT;
84 case PROGRAM_OUTPUT:
85 return TGSI_FILE_OUTPUT;
86 case PROGRAM_ADDRESS:
87 return TGSI_FILE_ADDRESS;
88 default:
89 assert( 0 );
90 return TGSI_FILE_NULL;
91 }
92 }
93
94 /**
95 * Map mesa register file index to TGSI index.
96 * Take special care when processing input and output indices.
97 * \param file one of TGSI_FILE_x
98 * \param index the mesa register file index
99 * \param inputMapping maps Mesa input indexes to TGSI input indexes
100 * \param outputMapping maps Mesa output indexes to TGSI output indexes
101 */
102 static GLuint
103 map_register_file_index(
104 GLuint procType,
105 GLuint file,
106 GLuint index,
107 GLuint *swizzle,
108 const GLuint inputMapping[],
109 const GLuint outputMapping[],
110 const GLuint immediateMapping[],
111 GLboolean indirectAccess )
112 {
113 switch( file ) {
114 case TGSI_FILE_INPUT:
115 if (procType == TGSI_PROCESSOR_FRAGMENT &&
116 index == FRAG_ATTRIB_FOGC) {
117 if (GET_SWZ(*swizzle, 0) == SWIZZLE_X) {
118 /* do nothing we're, ok */
119 } else if (GET_SWZ(*swizzle, 0) == SWIZZLE_Y) {
120 /* replace the swizzle with xxxx */
121 *swizzle = MAKE_SWIZZLE4(SWIZZLE_X,
122 SWIZZLE_X,
123 SWIZZLE_X,
124 SWIZZLE_X);
125 /* register after fog */
126 return inputMapping[index] + 1;
127 } else {
128 *swizzle = MAKE_SWIZZLE4(SWIZZLE_Z,
129 SWIZZLE_W,
130 SWIZZLE_Z,
131 SWIZZLE_W);
132 /* register after frontface */
133 return inputMapping[index] + 2;
134 }
135 }
136 /* inputs are mapped according to the user-defined map */
137 return inputMapping[index];
138
139 case TGSI_FILE_OUTPUT:
140 return outputMapping[index];
141
142 case TGSI_FILE_IMMEDIATE:
143 if (indirectAccess)
144 return index;
145 assert(immediateMapping[index] != ~0);
146 return immediateMapping[index];
147
148 default:
149 return index;
150 }
151 }
152
153 /*
154 * Map mesa texture target to TGSI texture target.
155 */
156 static GLuint
157 map_texture_target(
158 GLuint textarget,
159 GLboolean shadow )
160 {
161 switch( textarget ) {
162 case TEXTURE_1D_INDEX:
163 if (shadow)
164 return TGSI_TEXTURE_SHADOW1D;
165 else
166 return TGSI_TEXTURE_1D;
167 case TEXTURE_2D_INDEX:
168 if (shadow)
169 return TGSI_TEXTURE_SHADOW2D;
170 else
171 return TGSI_TEXTURE_2D;
172 case TEXTURE_3D_INDEX:
173 return TGSI_TEXTURE_3D;
174 case TEXTURE_CUBE_INDEX:
175 return TGSI_TEXTURE_CUBE;
176 case TEXTURE_RECT_INDEX:
177 if (shadow)
178 return TGSI_TEXTURE_SHADOWRECT;
179 else
180 return TGSI_TEXTURE_RECT;
181 default:
182 assert( 0 );
183 }
184
185 return TGSI_TEXTURE_1D;
186 }
187
188 static GLuint
189 convert_sat(
190 GLuint sat )
191 {
192 switch( sat ) {
193 case SATURATE_OFF:
194 return TGSI_SAT_NONE;
195 case SATURATE_ZERO_ONE:
196 return TGSI_SAT_ZERO_ONE;
197 case SATURATE_PLUS_MINUS_ONE:
198 return TGSI_SAT_MINUS_PLUS_ONE;
199 default:
200 assert( 0 );
201 return TGSI_SAT_NONE;
202 }
203 }
204
205 static GLuint
206 convert_writemask(
207 GLuint writemask )
208 {
209 assert( WRITEMASK_X == TGSI_WRITEMASK_X );
210 assert( WRITEMASK_Y == TGSI_WRITEMASK_Y );
211 assert( WRITEMASK_Z == TGSI_WRITEMASK_Z );
212 assert( WRITEMASK_W == TGSI_WRITEMASK_W );
213 assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 );
214
215 return writemask;
216 }
217
218 static struct tgsi_full_immediate
219 make_immediate(const float *value, uint size)
220 {
221 struct tgsi_full_immediate imm;
222 unsigned i;
223
224 imm = tgsi_default_full_immediate();
225 imm.Immediate.NrTokens += size;
226 imm.Immediate.DataType = TGSI_IMM_FLOAT32;
227
228 for (i = 0; i < size; i++)
229 imm.u[i].Float = value[i];
230
231 return imm;
232 }
233
234 static void
235 compile_instruction(
236 const struct prog_instruction *inst,
237 struct tgsi_full_instruction *fullinst,
238 const GLuint inputMapping[],
239 const GLuint outputMapping[],
240 const GLuint immediateMapping[],
241 GLboolean indirectAccess,
242 GLuint preamble_size,
243 GLuint procType,
244 GLboolean *insideSubroutine,
245 GLint wposTemp)
246 {
247 GLuint i;
248 struct tgsi_full_dst_register *fulldst;
249 struct tgsi_full_src_register *fullsrc;
250
251 *fullinst = tgsi_default_full_instruction();
252
253 fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode );
254 fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode );
255 fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode );
256
257 fulldst = &fullinst->FullDstRegisters[0];
258 fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE );
259 fulldst->DstRegister.Index = map_register_file_index(
260 procType,
261 fulldst->DstRegister.File,
262 inst->DstReg.Index,
263 NULL,
264 inputMapping,
265 outputMapping,
266 NULL,
267 GL_FALSE );
268 fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );
269 if (inst->DstReg.RelAddr) {
270 fulldst->DstRegister.Indirect = 1;
271 fulldst->DstRegisterInd.File = TGSI_FILE_ADDRESS;
272 fulldst->DstRegisterInd.Index = 0;
273 }
274
275 for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
276 GLuint j;
277 GLuint swizzle = inst->SrcReg[i].Swizzle;
278
279 fullsrc = &fullinst->FullSrcRegisters[i];
280
281 if (procType == TGSI_PROCESSOR_FRAGMENT &&
282 inst->SrcReg[i].File == PROGRAM_INPUT &&
283 inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
284 /* special case of INPUT[WPOS] */
285 fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
286 fullsrc->SrcRegister.Index = wposTemp;
287 }
288 else {
289 /* any other src register */
290 fullsrc->SrcRegister.File = map_register_file(
291 inst->SrcReg[i].File,
292 inst->SrcReg[i].Index,
293 immediateMapping,
294 indirectAccess );
295 fullsrc->SrcRegister.Index = map_register_file_index(
296 procType,
297 fullsrc->SrcRegister.File,
298 inst->SrcReg[i].Index,
299 &swizzle,
300 inputMapping,
301 outputMapping,
302 immediateMapping,
303 indirectAccess );
304 }
305
306 /* swizzle (ext swizzle also depends on negation) */
307 {
308 GLuint swz[4];
309 GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE &&
310 inst->SrcReg[i].Negate != NEGATE_XYZW);
311 for( j = 0; j < 4; j++ ) {
312 swz[j] = GET_SWZ( swizzle, j );
313 if (swz[j] > SWIZZLE_W)
314 extended = GL_TRUE;
315 }
316 if (extended) {
317 for (j = 0; j < 4; j++) {
318 tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz,
319 swz[j], j);
320 }
321 }
322 else {
323 for (j = 0; j < 4; j++) {
324 tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister,
325 swz[j], j);
326 }
327 }
328 }
329
330 if( inst->SrcReg[i].Negate == NEGATE_XYZW ) {
331 fullsrc->SrcRegister.Negate = 1;
332 }
333 else if( inst->SrcReg[i].Negate != NEGATE_NONE ) {
334 if( inst->SrcReg[i].Negate & NEGATE_X ) {
335 fullsrc->SrcRegisterExtSwz.NegateX = 1;
336 }
337 if( inst->SrcReg[i].Negate & NEGATE_Y ) {
338 fullsrc->SrcRegisterExtSwz.NegateY = 1;
339 }
340 if( inst->SrcReg[i].Negate & NEGATE_Z ) {
341 fullsrc->SrcRegisterExtSwz.NegateZ = 1;
342 }
343 if( inst->SrcReg[i].Negate & NEGATE_W ) {
344 fullsrc->SrcRegisterExtSwz.NegateW = 1;
345 }
346 }
347
348 if( inst->SrcReg[i].Abs ) {
349 fullsrc->SrcRegisterExtMod.Absolute = 1;
350 }
351
352 if( inst->SrcReg[i].RelAddr ) {
353 fullsrc->SrcRegister.Indirect = 1;
354
355 fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS;
356 fullsrc->SrcRegisterInd.Index = 0;
357 }
358 }
359
360 switch( inst->Opcode ) {
361 case OPCODE_ARL:
362 fullinst->Instruction.Opcode = TGSI_OPCODE_ARL;
363 break;
364 case OPCODE_ABS:
365 fullinst->Instruction.Opcode = TGSI_OPCODE_ABS;
366 break;
367 case OPCODE_ADD:
368 fullinst->Instruction.Opcode = TGSI_OPCODE_ADD;
369 break;
370 case OPCODE_BGNLOOP:
371 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP2;
372 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
373 break;
374 case OPCODE_BGNSUB:
375 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB;
376 *insideSubroutine = GL_TRUE;
377 break;
378 case OPCODE_BRA:
379 fullinst->Instruction.Opcode = TGSI_OPCODE_BRA;
380 break;
381 case OPCODE_BRK:
382 fullinst->Instruction.Opcode = TGSI_OPCODE_BRK;
383 break;
384 case OPCODE_CAL:
385 fullinst->Instruction.Opcode = TGSI_OPCODE_CAL;
386 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
387 break;
388 case OPCODE_CMP:
389 fullinst->Instruction.Opcode = TGSI_OPCODE_CMP;
390 break;
391 case OPCODE_CONT:
392 fullinst->Instruction.Opcode = TGSI_OPCODE_CONT;
393 break;
394 case OPCODE_COS:
395 fullinst->Instruction.Opcode = TGSI_OPCODE_COS;
396 break;
397 case OPCODE_DDX:
398 fullinst->Instruction.Opcode = TGSI_OPCODE_DDX;
399 break;
400 case OPCODE_DDY:
401 fullinst->Instruction.Opcode = TGSI_OPCODE_DDY;
402 break;
403 case OPCODE_DP2:
404 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2;
405 break;
406 case OPCODE_DP2A:
407 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A;
408 break;
409 case OPCODE_DP3:
410 fullinst->Instruction.Opcode = TGSI_OPCODE_DP3;
411 break;
412 case OPCODE_DP4:
413 fullinst->Instruction.Opcode = TGSI_OPCODE_DP4;
414 break;
415 case OPCODE_DPH:
416 fullinst->Instruction.Opcode = TGSI_OPCODE_DPH;
417 break;
418 case OPCODE_DST:
419 fullinst->Instruction.Opcode = TGSI_OPCODE_DST;
420 break;
421 case OPCODE_ELSE:
422 fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE;
423 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
424 break;
425 case OPCODE_ENDIF:
426 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF;
427 break;
428 case OPCODE_ENDLOOP:
429 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP2;
430 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
431 break;
432 case OPCODE_ENDSUB:
433 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB;
434 *insideSubroutine = GL_FALSE;
435 break;
436 case OPCODE_EX2:
437 fullinst->Instruction.Opcode = TGSI_OPCODE_EX2;
438 break;
439 case OPCODE_EXP:
440 fullinst->Instruction.Opcode = TGSI_OPCODE_EXP;
441 break;
442 case OPCODE_FLR:
443 fullinst->Instruction.Opcode = TGSI_OPCODE_FLR;
444 break;
445 case OPCODE_FRC:
446 fullinst->Instruction.Opcode = TGSI_OPCODE_FRC;
447 break;
448 case OPCODE_IF:
449 fullinst->Instruction.Opcode = TGSI_OPCODE_IF;
450 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
451 break;
452 case OPCODE_TRUNC:
453 fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC;
454 break;
455 case OPCODE_KIL:
456 /* conditional */
457 fullinst->Instruction.Opcode = TGSI_OPCODE_KIL;
458 break;
459 case OPCODE_KIL_NV:
460 /* predicated */
461 assert(inst->DstReg.CondMask == COND_TR);
462 fullinst->Instruction.Opcode = TGSI_OPCODE_KILP;
463 break;
464 case OPCODE_LG2:
465 fullinst->Instruction.Opcode = TGSI_OPCODE_LG2;
466 break;
467 case OPCODE_LOG:
468 fullinst->Instruction.Opcode = TGSI_OPCODE_LOG;
469 break;
470 case OPCODE_LIT:
471 fullinst->Instruction.Opcode = TGSI_OPCODE_LIT;
472 break;
473 case OPCODE_LRP:
474 fullinst->Instruction.Opcode = TGSI_OPCODE_LRP;
475 break;
476 case OPCODE_MAD:
477 fullinst->Instruction.Opcode = TGSI_OPCODE_MAD;
478 break;
479 case OPCODE_MAX:
480 fullinst->Instruction.Opcode = TGSI_OPCODE_MAX;
481 break;
482 case OPCODE_MIN:
483 fullinst->Instruction.Opcode = TGSI_OPCODE_MIN;
484 break;
485 case OPCODE_MOV:
486 fullinst->Instruction.Opcode = TGSI_OPCODE_MOV;
487 break;
488 case OPCODE_MUL:
489 fullinst->Instruction.Opcode = TGSI_OPCODE_MUL;
490 break;
491 case OPCODE_NOISE1:
492 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1;
493 break;
494 case OPCODE_NOISE2:
495 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2;
496 break;
497 case OPCODE_NOISE3:
498 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3;
499 break;
500 case OPCODE_NOISE4:
501 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4;
502 break;
503 case OPCODE_NOP:
504 fullinst->Instruction.Opcode = TGSI_OPCODE_NOP;
505 break;
506 case OPCODE_NRM3:
507 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM;
508 break;
509 case OPCODE_NRM4:
510 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4;
511 break;
512 case OPCODE_POW:
513 fullinst->Instruction.Opcode = TGSI_OPCODE_POW;
514 break;
515 case OPCODE_RCP:
516 fullinst->Instruction.Opcode = TGSI_OPCODE_RCP;
517 break;
518 case OPCODE_RET:
519 /* If RET is used inside main (not a real subroutine) we may want
520 * to execute END instead of RET. TBD...
521 */
522 if (1 /* *insideSubroutine */) {
523 fullinst->Instruction.Opcode = TGSI_OPCODE_RET;
524 }
525 else {
526 /* inside main() pseudo-function */
527 fullinst->Instruction.Opcode = TGSI_OPCODE_END;
528 }
529 break;
530 case OPCODE_RSQ:
531 fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
532 break;
533 case OPCODE_SCS:
534 fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
535 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY;
536 break;
537 case OPCODE_SEQ:
538 fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ;
539 break;
540 case OPCODE_SGE:
541 fullinst->Instruction.Opcode = TGSI_OPCODE_SGE;
542 break;
543 case OPCODE_SGT:
544 fullinst->Instruction.Opcode = TGSI_OPCODE_SGT;
545 break;
546 case OPCODE_SIN:
547 fullinst->Instruction.Opcode = TGSI_OPCODE_SIN;
548 break;
549 case OPCODE_SLE:
550 fullinst->Instruction.Opcode = TGSI_OPCODE_SLE;
551 break;
552 case OPCODE_SLT:
553 fullinst->Instruction.Opcode = TGSI_OPCODE_SLT;
554 break;
555 case OPCODE_SNE:
556 fullinst->Instruction.Opcode = TGSI_OPCODE_SNE;
557 break;
558 case OPCODE_SSG:
559 fullinst->Instruction.Opcode = TGSI_OPCODE_SSG;
560 break;
561 case OPCODE_SUB:
562 fullinst->Instruction.Opcode = TGSI_OPCODE_SUB;
563 break;
564 case OPCODE_SWZ:
565 fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ;
566 break;
567 case OPCODE_TEX:
568 /* ordinary texture lookup */
569 fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
570 fullinst->Instruction.NumSrcRegs = 2;
571 fullinst->InstructionExtTexture.Texture =
572 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
573 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
574 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
575 break;
576 case OPCODE_TXB:
577 /* texture lookup with LOD bias */
578 fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
579 fullinst->Instruction.NumSrcRegs = 2;
580 fullinst->InstructionExtTexture.Texture =
581 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
582 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
583 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
584 break;
585 case OPCODE_TXD:
586 /* texture lookup with explicit partial derivatives */
587 fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
588 fullinst->Instruction.NumSrcRegs = 4;
589 fullinst->InstructionExtTexture.Texture =
590 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
591 /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
592 fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
593 fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
594 break;
595 case OPCODE_TXL:
596 /* texture lookup with explicit LOD */
597 fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
598 fullinst->Instruction.NumSrcRegs = 2;
599 fullinst->InstructionExtTexture.Texture =
600 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
601 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
602 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
603 break;
604 case OPCODE_TXP:
605 /* texture lookup with divide by Q component */
606 /* convert to TEX w/ special flag for division */
607 fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
608 fullinst->Instruction.NumSrcRegs = 2;
609 fullinst->InstructionExtTexture.Texture =
610 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
611 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
612 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
613 break;
614 case OPCODE_XPD:
615 fullinst->Instruction.Opcode = TGSI_OPCODE_XPD;
616 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ;
617 break;
618 case OPCODE_END:
619 fullinst->Instruction.Opcode = TGSI_OPCODE_END;
620 break;
621 default:
622 assert( 0 );
623 }
624 }
625
626 /**
627 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens
628 */
629 static struct tgsi_full_declaration
630 make_input_decl(
631 GLuint index,
632 GLboolean interpolate_info,
633 GLuint interpolate,
634 GLuint usage_mask,
635 GLboolean semantic_info,
636 GLuint semantic_name,
637 GLbitfield semantic_index,
638 GLbitfield input_flags)
639 {
640 struct tgsi_full_declaration decl;
641
642 assert(semantic_name < TGSI_SEMANTIC_COUNT);
643
644 decl = tgsi_default_full_declaration();
645 decl.Declaration.File = TGSI_FILE_INPUT;
646 decl.Declaration.UsageMask = usage_mask;
647 decl.Declaration.Semantic = semantic_info;
648 decl.DeclarationRange.First = index;
649 decl.DeclarationRange.Last = index;
650 if (semantic_info) {
651 decl.Semantic.SemanticName = semantic_name;
652 decl.Semantic.SemanticIndex = semantic_index;
653 }
654 if (interpolate_info) {
655 decl.Declaration.Interpolate = interpolate;
656 }
657 if (input_flags & PROG_PARAM_BIT_CENTROID)
658 decl.Declaration.Centroid = 1;
659 if (input_flags & PROG_PARAM_BIT_INVARIANT)
660 decl.Declaration.Invariant = 1;
661
662 return decl;
663 }
664
665 /**
666 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens
667 */
668 static struct tgsi_full_declaration
669 make_output_decl(
670 GLuint index,
671 GLuint semantic_name,
672 GLuint semantic_index,
673 GLuint usage_mask,
674 GLbitfield output_flags)
675 {
676 struct tgsi_full_declaration decl;
677
678 assert(semantic_name < TGSI_SEMANTIC_COUNT);
679
680 decl = tgsi_default_full_declaration();
681 decl.Declaration.File = TGSI_FILE_OUTPUT;
682 decl.Declaration.UsageMask = usage_mask;
683 decl.Declaration.Semantic = 1;
684 decl.DeclarationRange.First = index;
685 decl.DeclarationRange.Last = index;
686 decl.Semantic.SemanticName = semantic_name;
687 decl.Semantic.SemanticIndex = semantic_index;
688 if (output_flags & PROG_PARAM_BIT_CENTROID)
689 decl.Declaration.Centroid = 1;
690 if (output_flags & PROG_PARAM_BIT_INVARIANT)
691 decl.Declaration.Invariant = 1;
692
693 return decl;
694 }
695
696
697 static struct tgsi_full_declaration
698 make_temp_decl(
699 GLuint start_index,
700 GLuint end_index )
701 {
702 struct tgsi_full_declaration decl;
703 decl = tgsi_default_full_declaration();
704 decl.Declaration.File = TGSI_FILE_TEMPORARY;
705 decl.DeclarationRange.First = start_index;
706 decl.DeclarationRange.Last = end_index;
707 return decl;
708 }
709
710 static struct tgsi_full_declaration
711 make_addr_decl(
712 GLuint start_index,
713 GLuint end_index )
714 {
715 struct tgsi_full_declaration decl;
716
717 decl = tgsi_default_full_declaration();
718 decl.Declaration.File = TGSI_FILE_ADDRESS;
719 decl.DeclarationRange.First = start_index;
720 decl.DeclarationRange.Last = end_index;
721 return decl;
722 }
723
724 static struct tgsi_full_declaration
725 make_sampler_decl(GLuint index)
726 {
727 struct tgsi_full_declaration decl;
728 decl = tgsi_default_full_declaration();
729 decl.Declaration.File = TGSI_FILE_SAMPLER;
730 decl.DeclarationRange.First = index;
731 decl.DeclarationRange.Last = index;
732 return decl;
733 }
734
735 /** Reference into a constant buffer */
736 static struct tgsi_full_declaration
737 make_constant_decl(GLuint first, GLuint last)
738 {
739 struct tgsi_full_declaration decl;
740 decl = tgsi_default_full_declaration();
741 decl.Declaration.File = TGSI_FILE_CONSTANT;
742 decl.DeclarationRange.First = first;
743 decl.DeclarationRange.Last = last;
744 return decl;
745 }
746
747
748
749 /**
750 * Find the temporaries which are used in the given program.
751 */
752 static void
753 find_temporaries(const struct gl_program *program,
754 GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
755 {
756 GLuint i, j;
757
758 for (i = 0; i < MAX_PROGRAM_TEMPS; i++)
759 tempsUsed[i] = GL_FALSE;
760
761 for (i = 0; i < program->NumInstructions; i++) {
762 const struct prog_instruction *inst = program->Instructions + i;
763 const GLuint n = _mesa_num_inst_src_regs( inst->Opcode );
764 for (j = 0; j < n; j++) {
765 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY)
766 tempsUsed[inst->SrcReg[j].Index] = GL_TRUE;
767 if (inst->DstReg.File == PROGRAM_TEMPORARY)
768 tempsUsed[inst->DstReg.Index] = GL_TRUE;
769 }
770 }
771 }
772
773
774 /**
775 * Find an unused temporary in the tempsUsed array.
776 */
777 static int
778 find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
779 {
780 int i;
781 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
782 if (!tempsUsed[i]) {
783 tempsUsed[i] = GL_TRUE;
784 return i;
785 }
786 }
787 return -1;
788 }
789
790
791 /** helper for building simple TGSI instruction, one src register */
792 static void
793 build_tgsi_instruction1(struct tgsi_full_instruction *inst,
794 int opcode,
795 int dstFile, int dstIndex, int writemask,
796 int srcFile1, int srcIndex1)
797 {
798 *inst = tgsi_default_full_instruction();
799
800 inst->Instruction.Opcode = opcode;
801
802 inst->Instruction.NumDstRegs = 1;
803 inst->FullDstRegisters[0].DstRegister.File = dstFile;
804 inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
805 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
806
807 inst->Instruction.NumSrcRegs = 1;
808 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
809 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
810 }
811
812
813 /** helper for building simple TGSI instruction, two src registers */
814 static void
815 build_tgsi_instruction2(struct tgsi_full_instruction *inst,
816 int opcode,
817 int dstFile, int dstIndex, int writemask,
818 int srcFile1, int srcIndex1,
819 int srcFile2, int srcIndex2)
820 {
821 *inst = tgsi_default_full_instruction();
822
823 inst->Instruction.Opcode = opcode;
824
825 inst->Instruction.NumDstRegs = 1;
826 inst->FullDstRegisters[0].DstRegister.File = dstFile;
827 inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
828 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
829
830 inst->Instruction.NumSrcRegs = 2;
831 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
832 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
833 inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
834 inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
835 }
836
837
838
839 /**
840 * Emit the TGSI instructions for inverting the WPOS y coordinate.
841 */
842 static int
843 emit_inverted_wpos(struct tgsi_token *tokens,
844 int wpos_temp,
845 int winsize_const,
846 int wpos_input,
847 struct tgsi_header *header, int maxTokens)
848 {
849 struct tgsi_full_instruction fullinst;
850 int ti = 0;
851
852 /* MOV wpos_temp.xzw, input[wpos]; */
853 build_tgsi_instruction1(&fullinst,
854 TGSI_OPCODE_MOV,
855 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
856 TGSI_FILE_INPUT, 0);
857
858 ti += tgsi_build_full_instruction(&fullinst,
859 &tokens[ti],
860 header,
861 maxTokens - ti);
862
863 /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
864 build_tgsi_instruction2(&fullinst,
865 TGSI_OPCODE_SUB,
866 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
867 TGSI_FILE_CONSTANT, winsize_const,
868 TGSI_FILE_INPUT, wpos_input);
869
870 ti += tgsi_build_full_instruction(&fullinst,
871 &tokens[ti],
872 header,
873 maxTokens - ti);
874
875 return ti;
876 }
877
878
879
880
881 /**
882 * Translate Mesa program to TGSI format.
883 * \param program the program to translate
884 * \param numInputs number of input registers used
885 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
886 * input indexes
887 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
888 * \param inputSemanticIndex the semantic index (ex: which texcoord) for each input
889 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
890
891 * \param numOutputs number of output registers used
892 * \param outputMapping maps Mesa fragment program outputs to TGSI
893 * generic outputs
894 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
895 * \param outputSemanticIndex the semantic index (ex: which texcoord) for each output
896 * \param tokens array to store translated tokens in
897 * \param maxTokens size of the tokens array
898 *
899 * \return number of tokens placed in 'tokens' buffer, or zero if error
900 */
901 GLuint
902 st_translate_mesa_program(
903 GLcontext *ctx,
904 uint procType,
905 const struct gl_program *program,
906 GLuint numInputs,
907 const GLuint inputMapping[],
908 const ubyte inputSemanticName[],
909 const ubyte inputSemanticIndex[],
910 const GLuint interpMode[],
911 const GLbitfield inputFlags[],
912 GLuint numOutputs,
913 const GLuint outputMapping[],
914 const ubyte outputSemanticName[],
915 const ubyte outputSemanticIndex[],
916 const GLbitfield outputFlags[],
917 struct tgsi_token *tokens,
918 GLuint maxTokens )
919 {
920 GLuint i;
921 GLuint ti; /* token index */
922 struct tgsi_header *header;
923 struct tgsi_processor *processor;
924 GLuint preamble_size = 0;
925 GLuint immediates[1000];
926 GLuint numImmediates = 0;
927 GLboolean insideSubroutine = GL_FALSE;
928 GLboolean indirectAccess = GL_FALSE;
929 GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
930 GLint wposTemp = -1, winHeightConst = -1;
931
932 assert(procType == TGSI_PROCESSOR_FRAGMENT ||
933 procType == TGSI_PROCESSOR_VERTEX);
934
935 find_temporaries(program, tempsUsed);
936
937 if (procType == TGSI_PROCESSOR_FRAGMENT) {
938 if (program->InputsRead & FRAG_BIT_WPOS) {
939 /* Fragment program uses fragment position input.
940 * Need to replace instances of INPUT[WPOS] with temp T
941 * where T = INPUT[WPOS] by y is inverted.
942 */
943 static const gl_state_index winSizeState[STATE_LENGTH]
944 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
945 winHeightConst = _mesa_add_state_reference(program->Parameters,
946 winSizeState);
947 wposTemp = find_free_temporary(tempsUsed);
948 }
949 }
950
951
952 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
953
954 header = (struct tgsi_header *) &tokens[1];
955 *header = tgsi_build_header();
956
957 processor = (struct tgsi_processor *) &tokens[2];
958 *processor = tgsi_build_processor( procType, header );
959
960 ti = 3;
961
962 /*
963 * Declare input attributes.
964 */
965 if (procType == TGSI_PROCESSOR_FRAGMENT) {
966 for (i = 0; i < numInputs; i++) {
967 struct tgsi_full_declaration fulldecl;
968 fulldecl = make_input_decl(i,
969 GL_TRUE, interpMode[i],
970 TGSI_WRITEMASK_XYZW,
971 GL_TRUE, inputSemanticName[i],
972 inputSemanticIndex[i],
973 inputFlags[i]);
974 ti += tgsi_build_full_declaration(&fulldecl,
975 &tokens[ti],
976 header,
977 maxTokens - ti );
978 }
979 }
980 else {
981 /* vertex prog */
982 /* XXX: this could probaby be merged with the clause above.
983 * the only difference is the semantic tags.
984 */
985 for (i = 0; i < numInputs; i++) {
986 struct tgsi_full_declaration fulldecl;
987 fulldecl = make_input_decl(i,
988 GL_FALSE, 0,
989 TGSI_WRITEMASK_XYZW,
990 GL_FALSE, 0, 0,
991 inputFlags[i]);
992 ti += tgsi_build_full_declaration(&fulldecl,
993 &tokens[ti],
994 header,
995 maxTokens - ti );
996 }
997 }
998
999 /*
1000 * Declare output attributes.
1001 */
1002 if (procType == TGSI_PROCESSOR_FRAGMENT) {
1003 for (i = 0; i < numOutputs; i++) {
1004 struct tgsi_full_declaration fulldecl;
1005 switch (outputSemanticName[i]) {
1006 case TGSI_SEMANTIC_POSITION:
1007 fulldecl = make_output_decl(i,
1008 TGSI_SEMANTIC_POSITION, /* Z / Depth */
1009 outputSemanticIndex[i],
1010 TGSI_WRITEMASK_Z,
1011 outputFlags[i]);
1012 break;
1013 case TGSI_SEMANTIC_COLOR:
1014 fulldecl = make_output_decl(i,
1015 TGSI_SEMANTIC_COLOR,
1016 outputSemanticIndex[i],
1017 TGSI_WRITEMASK_XYZW,
1018 outputFlags[i]);
1019 break;
1020 default:
1021 assert(0);
1022 return 0;
1023 }
1024 ti += tgsi_build_full_declaration(&fulldecl,
1025 &tokens[ti],
1026 header,
1027 maxTokens - ti );
1028 }
1029 }
1030 else {
1031 /* vertex prog */
1032 for (i = 0; i < numOutputs; i++) {
1033 struct tgsi_full_declaration fulldecl;
1034 fulldecl = make_output_decl(i,
1035 outputSemanticName[i],
1036 outputSemanticIndex[i],
1037 TGSI_WRITEMASK_XYZW,
1038 outputFlags[i]);
1039 ti += tgsi_build_full_declaration(&fulldecl,
1040 &tokens[ti],
1041 header,
1042 maxTokens - ti );
1043 }
1044 }
1045
1046 /* temporary decls */
1047 {
1048 GLboolean inside_range = GL_FALSE;
1049 GLuint start_range = 0;
1050
1051 tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
1052 for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
1053 if (tempsUsed[i] && !inside_range) {
1054 inside_range = GL_TRUE;
1055 start_range = i;
1056 }
1057 else if (!tempsUsed[i] && inside_range) {
1058 struct tgsi_full_declaration fulldecl;
1059
1060 inside_range = GL_FALSE;
1061 fulldecl = make_temp_decl( start_range, i - 1 );
1062 ti += tgsi_build_full_declaration(
1063 &fulldecl,
1064 &tokens[ti],
1065 header,
1066 maxTokens - ti );
1067 }
1068 }
1069 }
1070
1071 /* Declare address register.
1072 */
1073 if (program->NumAddressRegs > 0) {
1074 struct tgsi_full_declaration fulldecl;
1075
1076 assert( program->NumAddressRegs == 1 );
1077
1078 fulldecl = make_addr_decl( 0, 0 );
1079 ti += tgsi_build_full_declaration(
1080 &fulldecl,
1081 &tokens[ti],
1082 header,
1083 maxTokens - ti );
1084
1085 indirectAccess = GL_TRUE;
1086 }
1087
1088 /* immediates/literals */
1089 memset(immediates, ~0, sizeof(immediates));
1090
1091 /* Emit immediates only when there is no address register in use.
1092 * FIXME: Be smarter and recognize param arrays -- indirect addressing is
1093 * only valid within the referenced array.
1094 */
1095 if (program->Parameters && !indirectAccess) {
1096 for (i = 0; i < program->Parameters->NumParameters; i++) {
1097 if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
1098 struct tgsi_full_immediate fullimm;
1099
1100 fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
1101 ti += tgsi_build_full_immediate(
1102 &fullimm,
1103 &tokens[ti],
1104 header,
1105 maxTokens - ti );
1106 immediates[i] = numImmediates;
1107 numImmediates++;
1108 }
1109 }
1110 }
1111
1112 /* constant buffer refs */
1113 if (program->Parameters) {
1114 GLint start = -1, end = -1;
1115
1116 for (i = 0; i < program->Parameters->NumParameters; i++) {
1117 GLboolean emit = (i == program->Parameters->NumParameters - 1);
1118 GLboolean matches;
1119
1120 switch (program->Parameters->Parameters[i].Type) {
1121 case PROGRAM_ENV_PARAM:
1122 case PROGRAM_STATE_VAR:
1123 case PROGRAM_NAMED_PARAM:
1124 case PROGRAM_UNIFORM:
1125 matches = GL_TRUE;
1126 break;
1127 case PROGRAM_CONSTANT:
1128 matches = indirectAccess;
1129 break;
1130 default:
1131 matches = GL_FALSE;
1132 }
1133
1134 if (matches) {
1135 if (start == -1) {
1136 /* begin a sequence */
1137 start = i;
1138 end = i;
1139 }
1140 else {
1141 /* continue sequence */
1142 end = i;
1143 }
1144 }
1145 else {
1146 if (start != -1) {
1147 /* end of sequence */
1148 emit = GL_TRUE;
1149 }
1150 }
1151
1152 if (emit && start >= 0) {
1153 struct tgsi_full_declaration fulldecl;
1154
1155 fulldecl = make_constant_decl( start, end );
1156 ti += tgsi_build_full_declaration(
1157 &fulldecl,
1158 &tokens[ti],
1159 header,
1160 maxTokens - ti );
1161 start = end = -1;
1162 }
1163 }
1164 }
1165
1166 /* texture samplers */
1167 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1168 if (program->SamplersUsed & (1 << i)) {
1169 struct tgsi_full_declaration fulldecl;
1170
1171 fulldecl = make_sampler_decl( i );
1172 ti += tgsi_build_full_declaration(
1173 &fulldecl,
1174 &tokens[ti],
1175 header,
1176 maxTokens - ti );
1177 }
1178 }
1179
1180 /* invert WPOS fragment input */
1181 if (wposTemp >= 0) {
1182 ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
1183 inputMapping[FRAG_ATTRIB_WPOS],
1184 header, maxTokens - ti);
1185 preamble_size = 2; /* two instructions added */
1186 }
1187
1188 for (i = 0; i < program->NumInstructions; i++) {
1189 struct tgsi_full_instruction fullinst;
1190
1191 compile_instruction(
1192 &program->Instructions[i],
1193 &fullinst,
1194 inputMapping,
1195 outputMapping,
1196 immediates,
1197 indirectAccess,
1198 preamble_size,
1199 procType,
1200 &insideSubroutine,
1201 wposTemp);
1202
1203 ti += tgsi_build_full_instruction(
1204 &fullinst,
1205 &tokens[ti],
1206 header,
1207 maxTokens - ti );
1208 }
1209
1210 #if DEBUG
1211 if(!tgsi_sanity_check(tokens)) {
1212 debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
1213 debug_printf("\nOriginal program:\n%s", program->String);
1214 debug_printf("\nMesa program:\n");
1215 _mesa_print_program(program);
1216 debug_printf("\nTGSI program:\n");
1217 tgsi_dump(tokens, 0);
1218 assert(0);
1219 }
1220 #endif
1221
1222 return ti;
1223 }