glsl: enable the new linear scan register allocator code
[mesa.git] / src / mesa / state_tracker / st_mesa_to_tgsi.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * \author
30 * Michal Krol
31 */
32
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_build.h"
37 #include "tgsi/tgsi_util.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_sanity.h"
40 #include "st_mesa_to_tgsi.h"
41 #include "shader/prog_instruction.h"
42 #include "shader/prog_parameter.h"
43 #include "shader/prog_print.h"
44 #include "util/u_debug.h"
45
46 /*
47 * Map mesa register file to TGSI register file.
48 */
49 static GLuint
50 map_register_file(
51 gl_register_file file,
52 GLuint index,
53 const GLuint immediateMapping[],
54 GLboolean indirectAccess )
55 {
56 switch( file ) {
57 case PROGRAM_UNDEFINED:
58 return TGSI_FILE_NULL;
59 case PROGRAM_TEMPORARY:
60 return TGSI_FILE_TEMPORARY;
61 /*case PROGRAM_LOCAL_PARAM:*/
62 /*case PROGRAM_ENV_PARAM:*/
63
64 /* Because of the longstanding problem with mesa arb shaders
65 * where constants, immediates and state variables are all
66 * bundled together as PROGRAM_STATE_VAR, we can't tell from the
67 * mesa register file whether this is a CONSTANT or an
68 * IMMEDIATE, hence we need all the other information.
69 */
70 case PROGRAM_STATE_VAR:
71 case PROGRAM_NAMED_PARAM:
72 case PROGRAM_UNIFORM:
73 if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0)
74 return TGSI_FILE_IMMEDIATE;
75 else
76 return TGSI_FILE_CONSTANT;
77 case PROGRAM_CONSTANT:
78 if (indirectAccess)
79 return TGSI_FILE_CONSTANT;
80 assert(immediateMapping[index] != ~0);
81 return TGSI_FILE_IMMEDIATE;
82 case PROGRAM_INPUT:
83 return TGSI_FILE_INPUT;
84 case PROGRAM_OUTPUT:
85 return TGSI_FILE_OUTPUT;
86 case PROGRAM_ADDRESS:
87 return TGSI_FILE_ADDRESS;
88 default:
89 assert( 0 );
90 return TGSI_FILE_NULL;
91 }
92 }
93
94 /**
95 * Map mesa register file index to TGSI index.
96 * Take special care when processing input and output indices.
97 * \param file one of TGSI_FILE_x
98 * \param index the mesa register file index
99 * \param inputMapping maps Mesa input indexes to TGSI input indexes
100 * \param outputMapping maps Mesa output indexes to TGSI output indexes
101 */
102 static GLuint
103 map_register_file_index(
104 GLuint file,
105 GLuint index,
106 const GLuint inputMapping[],
107 const GLuint outputMapping[],
108 const GLuint immediateMapping[],
109 GLboolean indirectAccess )
110 {
111 switch( file ) {
112 case TGSI_FILE_INPUT:
113 /* inputs are mapped according to the user-defined map */
114 return inputMapping[index];
115
116 case TGSI_FILE_OUTPUT:
117 return outputMapping[index];
118
119 case TGSI_FILE_IMMEDIATE:
120 if (indirectAccess)
121 return index;
122 assert(immediateMapping[index] != ~0);
123 return immediateMapping[index];
124
125 default:
126 return index;
127 }
128 }
129
130 /*
131 * Map mesa texture target to TGSI texture target.
132 */
133 static GLuint
134 map_texture_target(
135 GLuint textarget,
136 GLboolean shadow )
137 {
138 #if 1
139 /* XXX remove this line after we've checked that the rest of gallium
140 * can handle the TGSI_TEXTURE_SHADOWx tokens.
141 */
142 shadow = GL_FALSE;
143 #endif
144 switch( textarget ) {
145 case TEXTURE_1D_INDEX:
146 if (shadow)
147 return TGSI_TEXTURE_SHADOW1D;
148 else
149 return TGSI_TEXTURE_1D;
150 case TEXTURE_2D_INDEX:
151 if (shadow)
152 return TGSI_TEXTURE_SHADOW2D;
153 else
154 return TGSI_TEXTURE_2D;
155 case TEXTURE_3D_INDEX:
156 return TGSI_TEXTURE_3D;
157 case TEXTURE_CUBE_INDEX:
158 return TGSI_TEXTURE_CUBE;
159 case TEXTURE_RECT_INDEX:
160 if (shadow)
161 return TGSI_TEXTURE_SHADOWRECT;
162 else
163 return TGSI_TEXTURE_RECT;
164 default:
165 assert( 0 );
166 }
167
168 return TGSI_TEXTURE_1D;
169 }
170
171 static GLuint
172 convert_sat(
173 GLuint sat )
174 {
175 switch( sat ) {
176 case SATURATE_OFF:
177 return TGSI_SAT_NONE;
178 case SATURATE_ZERO_ONE:
179 return TGSI_SAT_ZERO_ONE;
180 case SATURATE_PLUS_MINUS_ONE:
181 return TGSI_SAT_MINUS_PLUS_ONE;
182 default:
183 assert( 0 );
184 return TGSI_SAT_NONE;
185 }
186 }
187
188 static GLuint
189 convert_writemask(
190 GLuint writemask )
191 {
192 assert( WRITEMASK_X == TGSI_WRITEMASK_X );
193 assert( WRITEMASK_Y == TGSI_WRITEMASK_Y );
194 assert( WRITEMASK_Z == TGSI_WRITEMASK_Z );
195 assert( WRITEMASK_W == TGSI_WRITEMASK_W );
196 assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 );
197
198 return writemask;
199 }
200
201 static struct tgsi_full_immediate
202 make_immediate(const float *value, uint size)
203 {
204 struct tgsi_full_immediate imm;
205
206 imm = tgsi_default_full_immediate();
207 imm.Immediate.NrTokens += size;
208 imm.Immediate.DataType = TGSI_IMM_FLOAT32;
209 imm.u.Pointer = value;
210 return imm;
211 }
212
213 static void
214 compile_instruction(
215 const struct prog_instruction *inst,
216 struct tgsi_full_instruction *fullinst,
217 const GLuint inputMapping[],
218 const GLuint outputMapping[],
219 const GLuint immediateMapping[],
220 GLboolean indirectAccess,
221 GLuint preamble_size,
222 GLuint procType,
223 GLboolean *insideSubroutine,
224 GLint wposTemp)
225 {
226 GLuint i;
227 struct tgsi_full_dst_register *fulldst;
228 struct tgsi_full_src_register *fullsrc;
229
230 *fullinst = tgsi_default_full_instruction();
231
232 fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode );
233 fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode );
234 fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode );
235
236 fulldst = &fullinst->FullDstRegisters[0];
237 fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE );
238 fulldst->DstRegister.Index = map_register_file_index(
239 fulldst->DstRegister.File,
240 inst->DstReg.Index,
241 inputMapping,
242 outputMapping,
243 NULL,
244 GL_FALSE );
245 fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask );
246
247 for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
248 GLuint j;
249
250 fullsrc = &fullinst->FullSrcRegisters[i];
251
252 if (procType == TGSI_PROCESSOR_FRAGMENT &&
253 inst->SrcReg[i].File == PROGRAM_INPUT &&
254 inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
255 /* special case of INPUT[WPOS] */
256 fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
257 fullsrc->SrcRegister.Index = wposTemp;
258 }
259 else {
260 /* any other src register */
261 fullsrc->SrcRegister.File = map_register_file(
262 inst->SrcReg[i].File,
263 inst->SrcReg[i].Index,
264 immediateMapping,
265 indirectAccess );
266 fullsrc->SrcRegister.Index = map_register_file_index(
267 fullsrc->SrcRegister.File,
268 inst->SrcReg[i].Index,
269 inputMapping,
270 outputMapping,
271 immediateMapping,
272 indirectAccess );
273 }
274
275 /* swizzle (ext swizzle also depends on negation) */
276 {
277 GLuint swz[4];
278 GLboolean extended = (inst->SrcReg[i].NegateBase != NEGATE_NONE &&
279 inst->SrcReg[i].NegateBase != NEGATE_XYZW);
280 for( j = 0; j < 4; j++ ) {
281 swz[j] = GET_SWZ( inst->SrcReg[i].Swizzle, j );
282 if (swz[j] > SWIZZLE_W)
283 extended = GL_TRUE;
284 }
285 if (extended) {
286 for (j = 0; j < 4; j++) {
287 tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz,
288 swz[j], j);
289 }
290 }
291 else {
292 for (j = 0; j < 4; j++) {
293 tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister,
294 swz[j], j);
295 }
296 }
297 }
298
299 if( inst->SrcReg[i].NegateBase == NEGATE_XYZW ) {
300 fullsrc->SrcRegister.Negate = 1;
301 }
302 else if( inst->SrcReg[i].NegateBase != NEGATE_NONE ) {
303 if( inst->SrcReg[i].NegateBase & NEGATE_X ) {
304 fullsrc->SrcRegisterExtSwz.NegateX = 1;
305 }
306 if( inst->SrcReg[i].NegateBase & NEGATE_Y ) {
307 fullsrc->SrcRegisterExtSwz.NegateY = 1;
308 }
309 if( inst->SrcReg[i].NegateBase & NEGATE_Z ) {
310 fullsrc->SrcRegisterExtSwz.NegateZ = 1;
311 }
312 if( inst->SrcReg[i].NegateBase & NEGATE_W ) {
313 fullsrc->SrcRegisterExtSwz.NegateW = 1;
314 }
315 }
316
317 if( inst->SrcReg[i].Abs ) {
318 fullsrc->SrcRegisterExtMod.Absolute = 1;
319 }
320
321 if( inst->SrcReg[i].NegateAbs ) {
322 fullsrc->SrcRegisterExtMod.Negate = 1;
323 }
324
325 if( inst->SrcReg[i].RelAddr ) {
326 fullsrc->SrcRegister.Indirect = 1;
327
328 fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS;
329 fullsrc->SrcRegisterInd.Index = 0;
330 }
331 }
332
333 switch( inst->Opcode ) {
334 case OPCODE_ARL:
335 fullinst->Instruction.Opcode = TGSI_OPCODE_ARL;
336 break;
337 case OPCODE_ABS:
338 fullinst->Instruction.Opcode = TGSI_OPCODE_ABS;
339 break;
340 case OPCODE_ADD:
341 fullinst->Instruction.Opcode = TGSI_OPCODE_ADD;
342 break;
343 case OPCODE_BGNLOOP:
344 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP2;
345 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
346 break;
347 case OPCODE_BGNSUB:
348 fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB;
349 *insideSubroutine = GL_TRUE;
350 break;
351 case OPCODE_BRA:
352 fullinst->Instruction.Opcode = TGSI_OPCODE_BRA;
353 break;
354 case OPCODE_BRK:
355 fullinst->Instruction.Opcode = TGSI_OPCODE_BRK;
356 break;
357 case OPCODE_CAL:
358 fullinst->Instruction.Opcode = TGSI_OPCODE_CAL;
359 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
360 break;
361 case OPCODE_CMP:
362 fullinst->Instruction.Opcode = TGSI_OPCODE_CMP;
363 break;
364 case OPCODE_CONT:
365 fullinst->Instruction.Opcode = TGSI_OPCODE_CONT;
366 break;
367 case OPCODE_COS:
368 fullinst->Instruction.Opcode = TGSI_OPCODE_COS;
369 break;
370 case OPCODE_DDX:
371 fullinst->Instruction.Opcode = TGSI_OPCODE_DDX;
372 break;
373 case OPCODE_DDY:
374 fullinst->Instruction.Opcode = TGSI_OPCODE_DDY;
375 break;
376 case OPCODE_DP2:
377 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2;
378 break;
379 case OPCODE_DP2A:
380 fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A;
381 break;
382 case OPCODE_DP3:
383 fullinst->Instruction.Opcode = TGSI_OPCODE_DP3;
384 break;
385 case OPCODE_DP4:
386 fullinst->Instruction.Opcode = TGSI_OPCODE_DP4;
387 break;
388 case OPCODE_DPH:
389 fullinst->Instruction.Opcode = TGSI_OPCODE_DPH;
390 break;
391 case OPCODE_DST:
392 fullinst->Instruction.Opcode = TGSI_OPCODE_DST;
393 break;
394 case OPCODE_ELSE:
395 fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE;
396 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
397 break;
398 case OPCODE_ENDIF:
399 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF;
400 break;
401 case OPCODE_ENDLOOP:
402 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP2;
403 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
404 break;
405 case OPCODE_ENDSUB:
406 fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB;
407 *insideSubroutine = GL_FALSE;
408 break;
409 case OPCODE_EX2:
410 fullinst->Instruction.Opcode = TGSI_OPCODE_EX2;
411 break;
412 case OPCODE_EXP:
413 fullinst->Instruction.Opcode = TGSI_OPCODE_EXP;
414 break;
415 case OPCODE_FLR:
416 fullinst->Instruction.Opcode = TGSI_OPCODE_FLR;
417 break;
418 case OPCODE_FRC:
419 fullinst->Instruction.Opcode = TGSI_OPCODE_FRC;
420 break;
421 case OPCODE_IF:
422 fullinst->Instruction.Opcode = TGSI_OPCODE_IF;
423 fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size;
424 break;
425 case OPCODE_TRUNC:
426 fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC;
427 break;
428 case OPCODE_KIL:
429 /* conditional */
430 fullinst->Instruction.Opcode = TGSI_OPCODE_KIL;
431 break;
432 case OPCODE_KIL_NV:
433 /* predicated */
434 assert(inst->DstReg.CondMask == COND_TR);
435 fullinst->Instruction.Opcode = TGSI_OPCODE_KILP;
436 break;
437 case OPCODE_LG2:
438 fullinst->Instruction.Opcode = TGSI_OPCODE_LG2;
439 break;
440 case OPCODE_LOG:
441 fullinst->Instruction.Opcode = TGSI_OPCODE_LOG;
442 break;
443 case OPCODE_LIT:
444 fullinst->Instruction.Opcode = TGSI_OPCODE_LIT;
445 break;
446 case OPCODE_LRP:
447 fullinst->Instruction.Opcode = TGSI_OPCODE_LRP;
448 break;
449 case OPCODE_MAD:
450 fullinst->Instruction.Opcode = TGSI_OPCODE_MAD;
451 break;
452 case OPCODE_MAX:
453 fullinst->Instruction.Opcode = TGSI_OPCODE_MAX;
454 break;
455 case OPCODE_MIN:
456 fullinst->Instruction.Opcode = TGSI_OPCODE_MIN;
457 break;
458 case OPCODE_MOV:
459 fullinst->Instruction.Opcode = TGSI_OPCODE_MOV;
460 break;
461 case OPCODE_MUL:
462 fullinst->Instruction.Opcode = TGSI_OPCODE_MUL;
463 break;
464 case OPCODE_NOISE1:
465 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1;
466 break;
467 case OPCODE_NOISE2:
468 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2;
469 break;
470 case OPCODE_NOISE3:
471 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3;
472 break;
473 case OPCODE_NOISE4:
474 fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4;
475 break;
476 case OPCODE_NOP:
477 fullinst->Instruction.Opcode = TGSI_OPCODE_NOP;
478 break;
479 case OPCODE_NRM3:
480 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM;
481 break;
482 case OPCODE_NRM4:
483 fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4;
484 break;
485 case OPCODE_POW:
486 fullinst->Instruction.Opcode = TGSI_OPCODE_POW;
487 break;
488 case OPCODE_RCP:
489 fullinst->Instruction.Opcode = TGSI_OPCODE_RCP;
490 break;
491 case OPCODE_RET:
492 /* If RET is used inside main (not a real subroutine) we may want
493 * to execute END instead of RET. TBD...
494 */
495 if (1 /* *insideSubroutine */) {
496 fullinst->Instruction.Opcode = TGSI_OPCODE_RET;
497 }
498 else {
499 /* inside main() pseudo-function */
500 fullinst->Instruction.Opcode = TGSI_OPCODE_END;
501 }
502 break;
503 case OPCODE_RSQ:
504 fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
505 break;
506 case OPCODE_SCS:
507 fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
508 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY;
509 break;
510 case OPCODE_SEQ:
511 fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ;
512 break;
513 case OPCODE_SGE:
514 fullinst->Instruction.Opcode = TGSI_OPCODE_SGE;
515 break;
516 case OPCODE_SGT:
517 fullinst->Instruction.Opcode = TGSI_OPCODE_SGT;
518 break;
519 case OPCODE_SIN:
520 fullinst->Instruction.Opcode = TGSI_OPCODE_SIN;
521 break;
522 case OPCODE_SLE:
523 fullinst->Instruction.Opcode = TGSI_OPCODE_SLE;
524 break;
525 case OPCODE_SLT:
526 fullinst->Instruction.Opcode = TGSI_OPCODE_SLT;
527 break;
528 case OPCODE_SNE:
529 fullinst->Instruction.Opcode = TGSI_OPCODE_SNE;
530 break;
531 case OPCODE_SSG:
532 fullinst->Instruction.Opcode = TGSI_OPCODE_SSG;
533 break;
534 case OPCODE_SUB:
535 fullinst->Instruction.Opcode = TGSI_OPCODE_SUB;
536 break;
537 case OPCODE_SWZ:
538 fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ;
539 break;
540 case OPCODE_TEX:
541 /* ordinary texture lookup */
542 fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
543 fullinst->Instruction.NumSrcRegs = 2;
544 fullinst->InstructionExtTexture.Texture =
545 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
546 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
547 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
548 break;
549 case OPCODE_TXB:
550 /* texture lookup with LOD bias */
551 fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
552 fullinst->Instruction.NumSrcRegs = 2;
553 fullinst->InstructionExtTexture.Texture =
554 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
555 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
556 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
557 break;
558 case OPCODE_TXD:
559 /* texture lookup with explicit partial derivatives */
560 fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
561 fullinst->Instruction.NumSrcRegs = 4;
562 fullinst->InstructionExtTexture.Texture =
563 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
564 /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
565 fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
566 fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
567 break;
568 case OPCODE_TXL:
569 /* texture lookup with explicit LOD */
570 fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
571 fullinst->Instruction.NumSrcRegs = 2;
572 fullinst->InstructionExtTexture.Texture =
573 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
574 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
575 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
576 break;
577 case OPCODE_TXP:
578 /* texture lookup with divide by Q component */
579 /* convert to TEX w/ special flag for division */
580 fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
581 fullinst->Instruction.NumSrcRegs = 2;
582 fullinst->InstructionExtTexture.Texture =
583 map_texture_target( inst->TexSrcTarget, inst->TexShadow );
584 fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
585 fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
586 break;
587 case OPCODE_XPD:
588 fullinst->Instruction.Opcode = TGSI_OPCODE_XPD;
589 fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ;
590 break;
591 case OPCODE_END:
592 fullinst->Instruction.Opcode = TGSI_OPCODE_END;
593 break;
594 default:
595 assert( 0 );
596 }
597 }
598
599 /**
600 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens
601 */
602 static struct tgsi_full_declaration
603 make_input_decl(
604 GLuint index,
605 GLboolean interpolate_info,
606 GLuint interpolate,
607 GLuint usage_mask,
608 GLboolean semantic_info,
609 GLuint semantic_name,
610 GLbitfield semantic_index,
611 GLbitfield input_flags)
612 {
613 struct tgsi_full_declaration decl;
614
615 assert(semantic_name < TGSI_SEMANTIC_COUNT);
616
617 decl = tgsi_default_full_declaration();
618 decl.Declaration.File = TGSI_FILE_INPUT;
619 decl.Declaration.UsageMask = usage_mask;
620 decl.Declaration.Semantic = semantic_info;
621 decl.DeclarationRange.First = index;
622 decl.DeclarationRange.Last = index;
623 if (semantic_info) {
624 decl.Semantic.SemanticName = semantic_name;
625 decl.Semantic.SemanticIndex = semantic_index;
626 }
627 if (interpolate_info) {
628 decl.Declaration.Interpolate = interpolate;
629 }
630 if (input_flags & PROG_PARAM_BIT_CENTROID)
631 decl.Declaration.Centroid = 1;
632 if (input_flags & PROG_PARAM_BIT_INVARIANT)
633 decl.Declaration.Invariant = 1;
634
635 return decl;
636 }
637
638 /**
639 * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens
640 */
641 static struct tgsi_full_declaration
642 make_output_decl(
643 GLuint index,
644 GLuint semantic_name,
645 GLuint semantic_index,
646 GLuint usage_mask,
647 GLbitfield output_flags)
648 {
649 struct tgsi_full_declaration decl;
650
651 assert(semantic_name < TGSI_SEMANTIC_COUNT);
652
653 decl = tgsi_default_full_declaration();
654 decl.Declaration.File = TGSI_FILE_OUTPUT;
655 decl.Declaration.UsageMask = usage_mask;
656 decl.Declaration.Semantic = 1;
657 decl.DeclarationRange.First = index;
658 decl.DeclarationRange.Last = index;
659 decl.Semantic.SemanticName = semantic_name;
660 decl.Semantic.SemanticIndex = semantic_index;
661 if (output_flags & PROG_PARAM_BIT_CENTROID)
662 decl.Declaration.Centroid = 1;
663 if (output_flags & PROG_PARAM_BIT_INVARIANT)
664 decl.Declaration.Invariant = 1;
665
666 return decl;
667 }
668
669
670 static struct tgsi_full_declaration
671 make_temp_decl(
672 GLuint start_index,
673 GLuint end_index )
674 {
675 struct tgsi_full_declaration decl;
676 decl = tgsi_default_full_declaration();
677 decl.Declaration.File = TGSI_FILE_TEMPORARY;
678 decl.DeclarationRange.First = start_index;
679 decl.DeclarationRange.Last = end_index;
680 return decl;
681 }
682
683 static struct tgsi_full_declaration
684 make_addr_decl(
685 GLuint start_index,
686 GLuint end_index )
687 {
688 struct tgsi_full_declaration decl;
689
690 decl = tgsi_default_full_declaration();
691 decl.Declaration.File = TGSI_FILE_ADDRESS;
692 decl.DeclarationRange.First = start_index;
693 decl.DeclarationRange.Last = end_index;
694 return decl;
695 }
696
697 static struct tgsi_full_declaration
698 make_sampler_decl(GLuint index)
699 {
700 struct tgsi_full_declaration decl;
701 decl = tgsi_default_full_declaration();
702 decl.Declaration.File = TGSI_FILE_SAMPLER;
703 decl.DeclarationRange.First = index;
704 decl.DeclarationRange.Last = index;
705 return decl;
706 }
707
708 /** Reference into a constant buffer */
709 static struct tgsi_full_declaration
710 make_constant_decl(GLuint first, GLuint last)
711 {
712 struct tgsi_full_declaration decl;
713 decl = tgsi_default_full_declaration();
714 decl.Declaration.File = TGSI_FILE_CONSTANT;
715 decl.DeclarationRange.First = first;
716 decl.DeclarationRange.Last = last;
717 return decl;
718 }
719
720
721
722 /**
723 * Find the temporaries which are used in the given program.
724 */
725 static void
726 find_temporaries(const struct gl_program *program,
727 GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
728 {
729 GLuint i, j;
730
731 for (i = 0; i < MAX_PROGRAM_TEMPS; i++)
732 tempsUsed[i] = GL_FALSE;
733
734 for (i = 0; i < program->NumInstructions; i++) {
735 const struct prog_instruction *inst = program->Instructions + i;
736 const GLuint n = _mesa_num_inst_src_regs( inst->Opcode );
737 for (j = 0; j < n; j++) {
738 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY)
739 tempsUsed[inst->SrcReg[j].Index] = GL_TRUE;
740 if (inst->DstReg.File == PROGRAM_TEMPORARY)
741 tempsUsed[inst->DstReg.Index] = GL_TRUE;
742 }
743 }
744 }
745
746
747 /**
748 * Find an unused temporary in the tempsUsed array.
749 */
750 static int
751 find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
752 {
753 int i;
754 for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
755 if (!tempsUsed[i]) {
756 tempsUsed[i] = GL_TRUE;
757 return i;
758 }
759 }
760 return -1;
761 }
762
763
764 /** helper for building simple TGSI instruction, one src register */
765 static void
766 build_tgsi_instruction1(struct tgsi_full_instruction *inst,
767 int opcode,
768 int dstFile, int dstIndex, int writemask,
769 int srcFile1, int srcIndex1)
770 {
771 *inst = tgsi_default_full_instruction();
772
773 inst->Instruction.Opcode = opcode;
774
775 inst->Instruction.NumDstRegs = 1;
776 inst->FullDstRegisters[0].DstRegister.File = dstFile;
777 inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
778 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
779
780 inst->Instruction.NumSrcRegs = 1;
781 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
782 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
783 }
784
785
786 /** helper for building simple TGSI instruction, two src registers */
787 static void
788 build_tgsi_instruction2(struct tgsi_full_instruction *inst,
789 int opcode,
790 int dstFile, int dstIndex, int writemask,
791 int srcFile1, int srcIndex1,
792 int srcFile2, int srcIndex2)
793 {
794 *inst = tgsi_default_full_instruction();
795
796 inst->Instruction.Opcode = opcode;
797
798 inst->Instruction.NumDstRegs = 1;
799 inst->FullDstRegisters[0].DstRegister.File = dstFile;
800 inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
801 inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
802
803 inst->Instruction.NumSrcRegs = 2;
804 inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
805 inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
806 inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
807 inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
808 }
809
810
811
812 /**
813 * Emit the TGSI instructions for inverting the WPOS y coordinate.
814 */
815 static int
816 emit_inverted_wpos(struct tgsi_token *tokens,
817 int wpos_temp,
818 int winsize_const,
819 int wpos_input,
820 struct tgsi_header *header, int maxTokens)
821 {
822 struct tgsi_full_instruction fullinst;
823 int ti = 0;
824
825 /* MOV wpos_temp.xzw, input[wpos]; */
826 build_tgsi_instruction1(&fullinst,
827 TGSI_OPCODE_MOV,
828 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
829 TGSI_FILE_INPUT, 0);
830
831 ti += tgsi_build_full_instruction(&fullinst,
832 &tokens[ti],
833 header,
834 maxTokens - ti);
835
836 /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
837 build_tgsi_instruction2(&fullinst,
838 TGSI_OPCODE_SUB,
839 TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
840 TGSI_FILE_CONSTANT, winsize_const,
841 TGSI_FILE_INPUT, wpos_input);
842
843 ti += tgsi_build_full_instruction(&fullinst,
844 &tokens[ti],
845 header,
846 maxTokens - ti);
847
848 return ti;
849 }
850
851
852
853
854 /**
855 * Translate Mesa program to TGSI format.
856 * \param program the program to translate
857 * \param numInputs number of input registers used
858 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
859 * input indexes
860 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
861 * \param inputSemanticIndex the semantic index (ex: which texcoord) for each input
862 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
863
864 * \param numOutputs number of output registers used
865 * \param outputMapping maps Mesa fragment program outputs to TGSI
866 * generic outputs
867 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
868 * \param outputSemanticIndex the semantic index (ex: which texcoord) for each output
869 * \param tokens array to store translated tokens in
870 * \param maxTokens size of the tokens array
871 *
872 * \return number of tokens placed in 'tokens' buffer, or zero if error
873 */
874 GLuint
875 st_translate_mesa_program(
876 GLcontext *ctx,
877 uint procType,
878 const struct gl_program *program,
879 GLuint numInputs,
880 const GLuint inputMapping[],
881 const ubyte inputSemanticName[],
882 const ubyte inputSemanticIndex[],
883 const GLuint interpMode[],
884 const GLbitfield inputFlags[],
885 GLuint numOutputs,
886 const GLuint outputMapping[],
887 const ubyte outputSemanticName[],
888 const ubyte outputSemanticIndex[],
889 const GLbitfield outputFlags[],
890 struct tgsi_token *tokens,
891 GLuint maxTokens )
892 {
893 GLuint i;
894 GLuint ti; /* token index */
895 struct tgsi_header *header;
896 struct tgsi_processor *processor;
897 GLuint preamble_size = 0;
898 GLuint immediates[1000];
899 GLuint numImmediates = 0;
900 GLboolean insideSubroutine = GL_FALSE;
901 GLboolean indirectAccess = GL_FALSE;
902 GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
903 GLint wposTemp = -1, winHeightConst = -1;
904
905 assert(procType == TGSI_PROCESSOR_FRAGMENT ||
906 procType == TGSI_PROCESSOR_VERTEX);
907
908 find_temporaries(program, tempsUsed);
909
910 if (procType == TGSI_PROCESSOR_FRAGMENT) {
911 if (program->InputsRead & FRAG_BIT_WPOS) {
912 /* Fragment program uses fragment position input.
913 * Need to replace instances of INPUT[WPOS] with temp T
914 * where T = INPUT[WPOS] by y is inverted.
915 */
916 static const gl_state_index winSizeState[STATE_LENGTH]
917 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
918 winHeightConst = _mesa_add_state_reference(program->Parameters,
919 winSizeState);
920 wposTemp = find_free_temporary(tempsUsed);
921 }
922 }
923
924
925 *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
926
927 header = (struct tgsi_header *) &tokens[1];
928 *header = tgsi_build_header();
929
930 processor = (struct tgsi_processor *) &tokens[2];
931 *processor = tgsi_build_processor( procType, header );
932
933 ti = 3;
934
935 /*
936 * Declare input attributes.
937 */
938 if (procType == TGSI_PROCESSOR_FRAGMENT) {
939 for (i = 0; i < numInputs; i++) {
940 struct tgsi_full_declaration fulldecl;
941 fulldecl = make_input_decl(i,
942 GL_TRUE, interpMode[i],
943 TGSI_WRITEMASK_XYZW,
944 GL_TRUE, inputSemanticName[i],
945 inputSemanticIndex[i],
946 inputFlags[i]);
947 ti += tgsi_build_full_declaration(&fulldecl,
948 &tokens[ti],
949 header,
950 maxTokens - ti );
951 }
952 }
953 else {
954 /* vertex prog */
955 /* XXX: this could probaby be merged with the clause above.
956 * the only difference is the semantic tags.
957 */
958 for (i = 0; i < numInputs; i++) {
959 struct tgsi_full_declaration fulldecl;
960 fulldecl = make_input_decl(i,
961 GL_FALSE, 0,
962 TGSI_WRITEMASK_XYZW,
963 GL_FALSE, 0, 0,
964 inputFlags[i]);
965 ti += tgsi_build_full_declaration(&fulldecl,
966 &tokens[ti],
967 header,
968 maxTokens - ti );
969 }
970 }
971
972 /*
973 * Declare output attributes.
974 */
975 if (procType == TGSI_PROCESSOR_FRAGMENT) {
976 for (i = 0; i < numOutputs; i++) {
977 struct tgsi_full_declaration fulldecl;
978 switch (outputSemanticName[i]) {
979 case TGSI_SEMANTIC_POSITION:
980 fulldecl = make_output_decl(i,
981 TGSI_SEMANTIC_POSITION, /* Z / Depth */
982 outputSemanticIndex[i],
983 TGSI_WRITEMASK_Z,
984 outputFlags[i]);
985 break;
986 case TGSI_SEMANTIC_COLOR:
987 fulldecl = make_output_decl(i,
988 TGSI_SEMANTIC_COLOR,
989 outputSemanticIndex[i],
990 TGSI_WRITEMASK_XYZW,
991 outputFlags[i]);
992 break;
993 default:
994 assert(0);
995 return 0;
996 }
997 ti += tgsi_build_full_declaration(&fulldecl,
998 &tokens[ti],
999 header,
1000 maxTokens - ti );
1001 }
1002 }
1003 else {
1004 /* vertex prog */
1005 for (i = 0; i < numOutputs; i++) {
1006 struct tgsi_full_declaration fulldecl;
1007 fulldecl = make_output_decl(i,
1008 outputSemanticName[i],
1009 outputSemanticIndex[i],
1010 TGSI_WRITEMASK_XYZW,
1011 outputFlags[i]);
1012 ti += tgsi_build_full_declaration(&fulldecl,
1013 &tokens[ti],
1014 header,
1015 maxTokens - ti );
1016 }
1017 }
1018
1019 /* temporary decls */
1020 {
1021 GLboolean inside_range = GL_FALSE;
1022 GLuint start_range = 0;
1023
1024 tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
1025 for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
1026 if (tempsUsed[i] && !inside_range) {
1027 inside_range = GL_TRUE;
1028 start_range = i;
1029 }
1030 else if (!tempsUsed[i] && inside_range) {
1031 struct tgsi_full_declaration fulldecl;
1032
1033 inside_range = GL_FALSE;
1034 fulldecl = make_temp_decl( start_range, i - 1 );
1035 ti += tgsi_build_full_declaration(
1036 &fulldecl,
1037 &tokens[ti],
1038 header,
1039 maxTokens - ti );
1040 }
1041 }
1042 }
1043
1044 /* Declare address register.
1045 */
1046 if (program->NumAddressRegs > 0) {
1047 struct tgsi_full_declaration fulldecl;
1048
1049 assert( program->NumAddressRegs == 1 );
1050
1051 fulldecl = make_addr_decl( 0, 0 );
1052 ti += tgsi_build_full_declaration(
1053 &fulldecl,
1054 &tokens[ti],
1055 header,
1056 maxTokens - ti );
1057
1058 indirectAccess = GL_TRUE;
1059 }
1060
1061 /* immediates/literals */
1062 memset(immediates, ~0, sizeof(immediates));
1063
1064 /* Emit immediates only when there is no address register in use.
1065 * FIXME: Be smarter and recognize param arrays -- indirect addressing is
1066 * only valid within the referenced array.
1067 */
1068 if (program->Parameters && !indirectAccess) {
1069 for (i = 0; i < program->Parameters->NumParameters; i++) {
1070 if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) {
1071 struct tgsi_full_immediate fullimm;
1072
1073 fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 );
1074 ti += tgsi_build_full_immediate(
1075 &fullimm,
1076 &tokens[ti],
1077 header,
1078 maxTokens - ti );
1079 immediates[i] = numImmediates;
1080 numImmediates++;
1081 }
1082 }
1083 }
1084
1085 /* constant buffer refs */
1086 if (program->Parameters) {
1087 GLint start = -1, end = -1;
1088
1089 for (i = 0; i < program->Parameters->NumParameters; i++) {
1090 GLboolean emit = (i == program->Parameters->NumParameters - 1);
1091 GLboolean matches;
1092
1093 switch (program->Parameters->Parameters[i].Type) {
1094 case PROGRAM_ENV_PARAM:
1095 case PROGRAM_STATE_VAR:
1096 case PROGRAM_NAMED_PARAM:
1097 case PROGRAM_UNIFORM:
1098 matches = GL_TRUE;
1099 break;
1100 case PROGRAM_CONSTANT:
1101 matches = indirectAccess;
1102 break;
1103 default:
1104 matches = GL_FALSE;
1105 }
1106
1107 if (matches) {
1108 if (start == -1) {
1109 /* begin a sequence */
1110 start = i;
1111 end = i;
1112 }
1113 else {
1114 /* continue sequence */
1115 end = i;
1116 }
1117 }
1118 else {
1119 if (start != -1) {
1120 /* end of sequence */
1121 emit = GL_TRUE;
1122 }
1123 }
1124
1125 if (emit && start >= 0) {
1126 struct tgsi_full_declaration fulldecl;
1127
1128 fulldecl = make_constant_decl( start, end );
1129 ti += tgsi_build_full_declaration(
1130 &fulldecl,
1131 &tokens[ti],
1132 header,
1133 maxTokens - ti );
1134 start = end = -1;
1135 }
1136 }
1137 }
1138
1139 /* texture samplers */
1140 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1141 if (program->SamplersUsed & (1 << i)) {
1142 struct tgsi_full_declaration fulldecl;
1143
1144 fulldecl = make_sampler_decl( i );
1145 ti += tgsi_build_full_declaration(
1146 &fulldecl,
1147 &tokens[ti],
1148 header,
1149 maxTokens - ti );
1150 }
1151 }
1152
1153 /* invert WPOS fragment input */
1154 if (wposTemp >= 0) {
1155 ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
1156 inputMapping[FRAG_ATTRIB_WPOS],
1157 header, maxTokens - ti);
1158 preamble_size = 2; /* two instructions added */
1159 }
1160
1161 for (i = 0; i < program->NumInstructions; i++) {
1162 struct tgsi_full_instruction fullinst;
1163
1164 compile_instruction(
1165 &program->Instructions[i],
1166 &fullinst,
1167 inputMapping,
1168 outputMapping,
1169 immediates,
1170 indirectAccess,
1171 preamble_size,
1172 procType,
1173 &insideSubroutine,
1174 wposTemp);
1175
1176 ti += tgsi_build_full_instruction(
1177 &fullinst,
1178 &tokens[ti],
1179 header,
1180 maxTokens - ti );
1181 }
1182
1183 #if DEBUG
1184 if(!tgsi_sanity_check(tokens)) {
1185 debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n");
1186 debug_printf("\nOriginal program:\n%s", program->String);
1187 debug_printf("\nMesa program:\n");
1188 _mesa_print_program(program);
1189 debug_printf("\nTGSI program:\n");
1190 tgsi_dump(tokens, 0);
1191 assert(0);
1192 }
1193 #endif
1194
1195 return ti;
1196 }