st/mesa: better handling of indirect addressing of temp/const register files
[mesa.git] / src / mesa / state_tracker / st_mesa_to_tgsi.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34 #include "pipe/p_compiler.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "pipe/p_state.h"
37 #include "pipe/p_context.h"
38 #include "tgsi/tgsi_ureg.h"
39 #include "st_mesa_to_tgsi.h"
40 #include "st_context.h"
41 #include "program/prog_instruction.h"
42 #include "program/prog_parameter.h"
43 #include "util/u_debug.h"
44 #include "util/u_math.h"
45 #include "util/u_memory.h"
46
47
48 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
49 (1 << PROGRAM_ENV_PARAM) | \
50 (1 << PROGRAM_STATE_VAR) | \
51 (1 << PROGRAM_NAMED_PARAM) | \
52 (1 << PROGRAM_CONSTANT) | \
53 (1 << PROGRAM_UNIFORM))
54
55
56 struct label {
57 unsigned branch_target;
58 unsigned token;
59 };
60
61
62 /**
63 * Intermediate state used during shader translation.
64 */
65 struct st_translate {
66 struct ureg_program *ureg;
67
68 struct ureg_dst temps[MAX_PROGRAM_TEMPS];
69 struct ureg_src *constants;
70 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
71 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
72 struct ureg_dst address[1];
73 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
74
75 /* Extra info for handling point size clamping in vertex shader */
76 struct ureg_dst pointSizeResult; /**< Actual point size output register */
77 struct ureg_src pointSizeConst; /**< Point size range constant register */
78 GLint pointSizeOutIndex; /**< Temp point size output register */
79 GLboolean prevInstWrotePointSize;
80
81 const GLuint *inputMapping;
82 const GLuint *outputMapping;
83
84 /* For every instruction that contains a label (eg CALL), keep
85 * details so that we can go back afterwards and emit the correct
86 * tgsi instruction number for each label.
87 */
88 struct label *labels;
89 unsigned labels_size;
90 unsigned labels_count;
91
92 /* Keep a record of the tgsi instruction number that each mesa
93 * instruction starts at, will be used to fix up labels after
94 * translation.
95 */
96 unsigned *insn;
97 unsigned insn_size;
98 unsigned insn_count;
99
100 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
101
102 boolean error;
103 };
104
105
106 /**
107 * Make note of a branch to a label in the TGSI code.
108 * After we've emitted all instructions, we'll go over the list
109 * of labels built here and patch the TGSI code with the actual
110 * location of each label.
111 */
112 static unsigned *get_label( struct st_translate *t,
113 unsigned branch_target )
114 {
115 unsigned i;
116
117 if (t->labels_count + 1 >= t->labels_size) {
118 unsigned old_size = t->labels_size;
119 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
120 t->labels = REALLOC( t->labels,
121 old_size * sizeof t->labels[0],
122 t->labels_size * sizeof t->labels[0] );
123 if (t->labels == NULL) {
124 static unsigned dummy;
125 t->error = TRUE;
126 return &dummy;
127 }
128 }
129
130 i = t->labels_count++;
131 t->labels[i].branch_target = branch_target;
132 return &t->labels[i].token;
133 }
134
135
136 /**
137 * Called prior to emitting the TGSI code for each Mesa instruction.
138 * Allocate additional space for instructions if needed.
139 * Update the insn[] array so the next Mesa instruction points to
140 * the next TGSI instruction.
141 */
142 static void set_insn_start( struct st_translate *t,
143 unsigned start )
144 {
145 if (t->insn_count + 1 >= t->insn_size) {
146 unsigned old_size = t->insn_size;
147 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
148 t->insn = REALLOC( t->insn,
149 old_size * sizeof t->insn[0],
150 t->insn_size * sizeof t->insn[0] );
151 if (t->insn == NULL) {
152 t->error = TRUE;
153 return;
154 }
155 }
156
157 t->insn[t->insn_count++] = start;
158 }
159
160
161 /**
162 * Map a Mesa dst register to a TGSI ureg_dst register.
163 */
164 static struct ureg_dst
165 dst_register( struct st_translate *t,
166 gl_register_file file,
167 GLuint index )
168 {
169 switch( file ) {
170 case PROGRAM_UNDEFINED:
171 return ureg_dst_undef();
172
173 case PROGRAM_TEMPORARY:
174 if (ureg_dst_is_undef(t->temps[index]))
175 t->temps[index] = ureg_DECL_temporary( t->ureg );
176
177 return t->temps[index];
178
179 case PROGRAM_OUTPUT:
180 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
181 t->prevInstWrotePointSize = GL_TRUE;
182
183 if (t->procType == TGSI_PROCESSOR_VERTEX)
184 assert(index < VERT_RESULT_MAX);
185 else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
186 assert(index < FRAG_RESULT_MAX);
187 else
188 assert(index < GEOM_RESULT_MAX);
189
190 assert(t->outputMapping[index] < Elements(t->outputs));
191
192 return t->outputs[t->outputMapping[index]];
193
194 case PROGRAM_ADDRESS:
195 return t->address[index];
196
197 default:
198 debug_assert( 0 );
199 return ureg_dst_undef();
200 }
201 }
202
203
204 /**
205 * Map a Mesa src register to a TGSI ureg_src register.
206 */
207 static struct ureg_src
208 src_register( struct st_translate *t,
209 gl_register_file file,
210 GLint index )
211 {
212 switch( file ) {
213 case PROGRAM_UNDEFINED:
214 return ureg_src_undef();
215
216 case PROGRAM_TEMPORARY:
217 ASSERT(index >= 0);
218 if (ureg_dst_is_undef(t->temps[index]))
219 t->temps[index] = ureg_DECL_temporary( t->ureg );
220 assert(index < Elements(t->temps));
221 return ureg_src(t->temps[index]);
222
223 case PROGRAM_NAMED_PARAM:
224 case PROGRAM_ENV_PARAM:
225 case PROGRAM_LOCAL_PARAM:
226 case PROGRAM_UNIFORM:
227 ASSERT(index >= 0);
228 return t->constants[index];
229 case PROGRAM_STATE_VAR:
230 case PROGRAM_CONSTANT: /* ie, immediate */
231 if (index < 0)
232 return ureg_DECL_constant( t->ureg, 0 );
233 else
234 return t->constants[index];
235
236 case PROGRAM_INPUT:
237 assert(t->inputMapping[index] < Elements(t->inputs));
238 return t->inputs[t->inputMapping[index]];
239
240 case PROGRAM_OUTPUT:
241 assert(t->outputMapping[index] < Elements(t->outputs));
242 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
243
244 case PROGRAM_ADDRESS:
245 return ureg_src(t->address[index]);
246
247 default:
248 debug_assert( 0 );
249 return ureg_src_undef();
250 }
251 }
252
253
254 /**
255 * Map mesa texture target to TGSI texture target.
256 */
257 static unsigned
258 translate_texture_target( GLuint textarget,
259 GLboolean shadow )
260 {
261 if (shadow) {
262 switch( textarget ) {
263 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D;
264 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D;
265 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
266 default: break;
267 }
268 }
269
270 switch( textarget ) {
271 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D;
272 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D;
273 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D;
274 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
275 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
276 default:
277 debug_assert( 0 );
278 return TGSI_TEXTURE_1D;
279 }
280 }
281
282
283 /**
284 * Create a TGSI ureg_dst register from a Mesa dest register.
285 */
286 static struct ureg_dst
287 translate_dst( struct st_translate *t,
288 const struct prog_dst_register *DstReg,
289 boolean saturate )
290 {
291 struct ureg_dst dst = dst_register( t,
292 DstReg->File,
293 DstReg->Index );
294
295 dst = ureg_writemask( dst,
296 DstReg->WriteMask );
297
298 if (saturate)
299 dst = ureg_saturate( dst );
300
301 if (DstReg->RelAddr)
302 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
303
304 return dst;
305 }
306
307
308 /**
309 * Create a TGSI ureg_src register from a Mesa src register.
310 */
311 static struct ureg_src
312 translate_src( struct st_translate *t,
313 const struct prog_src_register *SrcReg )
314 {
315 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
316
317 if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) {
318 src = src_register( t, SrcReg->File, SrcReg->Index2 );
319 if (SrcReg->RelAddr2)
320 src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]),
321 SrcReg->Index);
322 else
323 src = ureg_src_dimension( src, SrcReg->Index);
324 }
325
326 src = ureg_swizzle( src,
327 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
328 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
329 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
330 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
331
332 if (SrcReg->Negate == NEGATE_XYZW)
333 src = ureg_negate(src);
334
335 if (SrcReg->Abs)
336 src = ureg_abs(src);
337
338 if (SrcReg->RelAddr) {
339 src = ureg_src_indirect( src, ureg_src(t->address[0]));
340 if (SrcReg->File != PROGRAM_INPUT &&
341 SrcReg->File != PROGRAM_OUTPUT) {
342 /* If SrcReg->Index was negative, it was set to zero in
343 * src_register(). Reassign it now. But don't do this
344 * for input/output regs since they get remapped while
345 * const buffers don't.
346 */
347 src.Index = SrcReg->Index;
348 }
349 }
350
351 return src;
352 }
353
354
355 static struct ureg_src swizzle_4v( struct ureg_src src,
356 const unsigned *swz )
357 {
358 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
359 }
360
361
362 /**
363 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG:
364 *
365 * SWZ dst, src.x-y10
366 *
367 * becomes:
368 *
369 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
370 */
371 static void emit_swz( struct st_translate *t,
372 struct ureg_dst dst,
373 const struct prog_src_register *SrcReg )
374 {
375 struct ureg_program *ureg = t->ureg;
376 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
377
378 unsigned negate_mask = SrcReg->Negate;
379
380 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
381 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
382 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
383 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
384
385 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
386 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
387 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
388 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
389
390 unsigned negative_one_mask = one_mask & negate_mask;
391 unsigned positive_one_mask = one_mask & ~negate_mask;
392
393 struct ureg_src imm;
394 unsigned i;
395 unsigned mul_swizzle[4] = {0,0,0,0};
396 unsigned add_swizzle[4] = {0,0,0,0};
397 unsigned src_swizzle[4] = {0,0,0,0};
398 boolean need_add = FALSE;
399 boolean need_mul = FALSE;
400
401 if (dst.WriteMask == 0)
402 return;
403
404 /* Is this just a MOV?
405 */
406 if (zero_mask == 0 &&
407 one_mask == 0 &&
408 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
409 {
410 ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
411 return;
412 }
413
414 #define IMM_ZERO 0
415 #define IMM_ONE 1
416 #define IMM_NEG_ONE 2
417
418 imm = ureg_imm3f( ureg, 0, 1, -1 );
419
420 for (i = 0; i < 4; i++) {
421 unsigned bit = 1 << i;
422
423 if (dst.WriteMask & bit) {
424 if (positive_one_mask & bit) {
425 mul_swizzle[i] = IMM_ZERO;
426 add_swizzle[i] = IMM_ONE;
427 need_add = TRUE;
428 }
429 else if (negative_one_mask & bit) {
430 mul_swizzle[i] = IMM_ZERO;
431 add_swizzle[i] = IMM_NEG_ONE;
432 need_add = TRUE;
433 }
434 else if (zero_mask & bit) {
435 mul_swizzle[i] = IMM_ZERO;
436 add_swizzle[i] = IMM_ZERO;
437 need_add = TRUE;
438 }
439 else {
440 add_swizzle[i] = IMM_ZERO;
441 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
442 need_mul = TRUE;
443 if (negate_mask & bit) {
444 mul_swizzle[i] = IMM_NEG_ONE;
445 }
446 else {
447 mul_swizzle[i] = IMM_ONE;
448 }
449 }
450 }
451 }
452
453 if (need_mul && need_add) {
454 ureg_MAD( ureg,
455 dst,
456 swizzle_4v( src, src_swizzle ),
457 swizzle_4v( imm, mul_swizzle ),
458 swizzle_4v( imm, add_swizzle ) );
459 }
460 else if (need_mul) {
461 ureg_MUL( ureg,
462 dst,
463 swizzle_4v( src, src_swizzle ),
464 swizzle_4v( imm, mul_swizzle ) );
465 }
466 else if (need_add) {
467 ureg_MOV( ureg,
468 dst,
469 swizzle_4v( imm, add_swizzle ) );
470 }
471 else {
472 debug_assert(0);
473 }
474
475 #undef IMM_ZERO
476 #undef IMM_ONE
477 #undef IMM_NEG_ONE
478 }
479
480
481 /**
482 * Negate the value of DDY to match GL semantics where (0,0) is the
483 * lower-left corner of the window.
484 * Note that the GL_ARB_fragment_coord_conventions extension will
485 * effect this someday.
486 */
487 static void emit_ddy( struct st_translate *t,
488 struct ureg_dst dst,
489 const struct prog_src_register *SrcReg )
490 {
491 struct ureg_program *ureg = t->ureg;
492 struct ureg_src src = translate_src( t, SrcReg );
493 src = ureg_negate( src );
494 ureg_DDY( ureg, dst, src );
495 }
496
497
498
499 static unsigned
500 translate_opcode( unsigned op )
501 {
502 switch( op ) {
503 case OPCODE_ARL:
504 return TGSI_OPCODE_ARL;
505 case OPCODE_ABS:
506 return TGSI_OPCODE_ABS;
507 case OPCODE_ADD:
508 return TGSI_OPCODE_ADD;
509 case OPCODE_BGNLOOP:
510 return TGSI_OPCODE_BGNLOOP;
511 case OPCODE_BGNSUB:
512 return TGSI_OPCODE_BGNSUB;
513 case OPCODE_BRA:
514 return TGSI_OPCODE_BRA;
515 case OPCODE_BRK:
516 return TGSI_OPCODE_BRK;
517 case OPCODE_CAL:
518 return TGSI_OPCODE_CAL;
519 case OPCODE_CMP:
520 return TGSI_OPCODE_CMP;
521 case OPCODE_CONT:
522 return TGSI_OPCODE_CONT;
523 case OPCODE_COS:
524 return TGSI_OPCODE_COS;
525 case OPCODE_DDX:
526 return TGSI_OPCODE_DDX;
527 case OPCODE_DDY:
528 return TGSI_OPCODE_DDY;
529 case OPCODE_DP2:
530 return TGSI_OPCODE_DP2;
531 case OPCODE_DP2A:
532 return TGSI_OPCODE_DP2A;
533 case OPCODE_DP3:
534 return TGSI_OPCODE_DP3;
535 case OPCODE_DP4:
536 return TGSI_OPCODE_DP4;
537 case OPCODE_DPH:
538 return TGSI_OPCODE_DPH;
539 case OPCODE_DST:
540 return TGSI_OPCODE_DST;
541 case OPCODE_ELSE:
542 return TGSI_OPCODE_ELSE;
543 case OPCODE_EMIT_VERTEX:
544 return TGSI_OPCODE_EMIT;
545 case OPCODE_END_PRIMITIVE:
546 return TGSI_OPCODE_ENDPRIM;
547 case OPCODE_ENDIF:
548 return TGSI_OPCODE_ENDIF;
549 case OPCODE_ENDLOOP:
550 return TGSI_OPCODE_ENDLOOP;
551 case OPCODE_ENDSUB:
552 return TGSI_OPCODE_ENDSUB;
553 case OPCODE_EX2:
554 return TGSI_OPCODE_EX2;
555 case OPCODE_EXP:
556 return TGSI_OPCODE_EXP;
557 case OPCODE_FLR:
558 return TGSI_OPCODE_FLR;
559 case OPCODE_FRC:
560 return TGSI_OPCODE_FRC;
561 case OPCODE_IF:
562 return TGSI_OPCODE_IF;
563 case OPCODE_TRUNC:
564 return TGSI_OPCODE_TRUNC;
565 case OPCODE_KIL:
566 return TGSI_OPCODE_KIL;
567 case OPCODE_KIL_NV:
568 return TGSI_OPCODE_KILP;
569 case OPCODE_LG2:
570 return TGSI_OPCODE_LG2;
571 case OPCODE_LOG:
572 return TGSI_OPCODE_LOG;
573 case OPCODE_LIT:
574 return TGSI_OPCODE_LIT;
575 case OPCODE_LRP:
576 return TGSI_OPCODE_LRP;
577 case OPCODE_MAD:
578 return TGSI_OPCODE_MAD;
579 case OPCODE_MAX:
580 return TGSI_OPCODE_MAX;
581 case OPCODE_MIN:
582 return TGSI_OPCODE_MIN;
583 case OPCODE_MOV:
584 return TGSI_OPCODE_MOV;
585 case OPCODE_MUL:
586 return TGSI_OPCODE_MUL;
587 case OPCODE_NOP:
588 return TGSI_OPCODE_NOP;
589 case OPCODE_NRM3:
590 return TGSI_OPCODE_NRM;
591 case OPCODE_NRM4:
592 return TGSI_OPCODE_NRM4;
593 case OPCODE_POW:
594 return TGSI_OPCODE_POW;
595 case OPCODE_RCP:
596 return TGSI_OPCODE_RCP;
597 case OPCODE_RET:
598 return TGSI_OPCODE_RET;
599 case OPCODE_RSQ:
600 return TGSI_OPCODE_RSQ;
601 case OPCODE_SCS:
602 return TGSI_OPCODE_SCS;
603 case OPCODE_SEQ:
604 return TGSI_OPCODE_SEQ;
605 case OPCODE_SGE:
606 return TGSI_OPCODE_SGE;
607 case OPCODE_SGT:
608 return TGSI_OPCODE_SGT;
609 case OPCODE_SIN:
610 return TGSI_OPCODE_SIN;
611 case OPCODE_SLE:
612 return TGSI_OPCODE_SLE;
613 case OPCODE_SLT:
614 return TGSI_OPCODE_SLT;
615 case OPCODE_SNE:
616 return TGSI_OPCODE_SNE;
617 case OPCODE_SSG:
618 return TGSI_OPCODE_SSG;
619 case OPCODE_SUB:
620 return TGSI_OPCODE_SUB;
621 case OPCODE_TEX:
622 return TGSI_OPCODE_TEX;
623 case OPCODE_TXB:
624 return TGSI_OPCODE_TXB;
625 case OPCODE_TXD:
626 return TGSI_OPCODE_TXD;
627 case OPCODE_TXL:
628 return TGSI_OPCODE_TXL;
629 case OPCODE_TXP:
630 return TGSI_OPCODE_TXP;
631 case OPCODE_XPD:
632 return TGSI_OPCODE_XPD;
633 case OPCODE_END:
634 return TGSI_OPCODE_END;
635 default:
636 debug_assert( 0 );
637 return TGSI_OPCODE_NOP;
638 }
639 }
640
641
642 static void
643 compile_instruction(
644 struct st_translate *t,
645 const struct prog_instruction *inst )
646 {
647 struct ureg_program *ureg = t->ureg;
648 GLuint i;
649 struct ureg_dst dst[1];
650 struct ureg_src src[4];
651 unsigned num_dst;
652 unsigned num_src;
653
654 num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
655 num_src = _mesa_num_inst_src_regs( inst->Opcode );
656
657 if (num_dst)
658 dst[0] = translate_dst( t,
659 &inst->DstReg,
660 inst->SaturateMode );
661
662 for (i = 0; i < num_src; i++)
663 src[i] = translate_src( t, &inst->SrcReg[i] );
664
665 switch( inst->Opcode ) {
666 case OPCODE_SWZ:
667 emit_swz( t, dst[0], &inst->SrcReg[0] );
668 return;
669
670 case OPCODE_BGNLOOP:
671 case OPCODE_CAL:
672 case OPCODE_ELSE:
673 case OPCODE_ENDLOOP:
674 case OPCODE_IF:
675 debug_assert(num_dst == 0);
676 ureg_label_insn( ureg,
677 translate_opcode( inst->Opcode ),
678 src, num_src,
679 get_label( t, inst->BranchTarget ));
680 return;
681
682 case OPCODE_TEX:
683 case OPCODE_TXB:
684 case OPCODE_TXD:
685 case OPCODE_TXL:
686 case OPCODE_TXP:
687 src[num_src++] = t->samplers[inst->TexSrcUnit];
688 ureg_tex_insn( ureg,
689 translate_opcode( inst->Opcode ),
690 dst, num_dst,
691 translate_texture_target( inst->TexSrcTarget,
692 inst->TexShadow ),
693 src, num_src );
694 return;
695
696 case OPCODE_SCS:
697 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
698 ureg_insn( ureg,
699 translate_opcode( inst->Opcode ),
700 dst, num_dst,
701 src, num_src );
702 break;
703
704 case OPCODE_XPD:
705 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
706 ureg_insn( ureg,
707 translate_opcode( inst->Opcode ),
708 dst, num_dst,
709 src, num_src );
710 break;
711
712 case OPCODE_NOISE1:
713 case OPCODE_NOISE2:
714 case OPCODE_NOISE3:
715 case OPCODE_NOISE4:
716 /* At some point, a motivated person could add a better
717 * implementation of noise. Currently not even the nvidia
718 * binary drivers do anything more than this. In any case, the
719 * place to do this is in the GL state tracker, not the poor
720 * driver.
721 */
722 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
723 break;
724
725 case OPCODE_DDY:
726 emit_ddy( t, dst[0], &inst->SrcReg[0] );
727 break;
728
729 default:
730 ureg_insn( ureg,
731 translate_opcode( inst->Opcode ),
732 dst, num_dst,
733 src, num_src );
734 break;
735 }
736 }
737
738
739 /**
740 * Emit the TGSI instructions to adjust the WPOS pixel center convention
741 */
742 static void
743 emit_adjusted_wpos( struct st_translate *t,
744 const struct gl_program *program, GLfloat value)
745 {
746 struct ureg_program *ureg = t->ureg;
747 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
748 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
749
750 /* Note that we bias X and Y and pass Z and W through unchanged.
751 * The shader might also use gl_FragCoord.w and .z.
752 */
753 ureg_ADD(ureg, wpos_temp, wpos_input,
754 ureg_imm4f(ureg, value, value, 0.0f, 0.0f));
755
756 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
757 }
758
759
760 /**
761 * Emit the TGSI instructions for inverting the WPOS y coordinate.
762 */
763 static void
764 emit_inverted_wpos( struct st_translate *t,
765 const struct gl_program *program )
766 {
767 struct ureg_program *ureg = t->ureg;
768
769 /* Fragment program uses fragment position input.
770 * Need to replace instances of INPUT[WPOS] with temp T
771 * where T = INPUT[WPOS] by y is inverted.
772 */
773 static const gl_state_index winSizeState[STATE_LENGTH]
774 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
775
776 /* XXX: note we are modifying the incoming shader here! Need to
777 * do this before emitting the constant decls below, or this
778 * will be missed:
779 */
780 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
781 winSizeState);
782
783 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
784 struct ureg_dst wpos_temp;
785 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
786
787 /* MOV wpos_temp, input[wpos]
788 */
789 if (wpos_input.File == TGSI_FILE_TEMPORARY)
790 wpos_temp = ureg_dst(wpos_input);
791 else {
792 wpos_temp = ureg_DECL_temporary( ureg );
793 ureg_MOV( ureg, wpos_temp, wpos_input );
794 }
795
796 /* SUB wpos_temp.y, winsize_const, wpos_input
797 */
798 ureg_SUB( ureg,
799 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
800 winsize,
801 wpos_input);
802
803 /* Use wpos_temp as position input from here on:
804 */
805 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
806 }
807
808
809 /**
810 * Emit fragment position/ooordinate code.
811 */
812 static void
813 emit_wpos(struct st_context *st,
814 struct st_translate *t,
815 const struct gl_program *program,
816 struct ureg_program *ureg)
817 {
818 const struct gl_fragment_program *fp =
819 (const struct gl_fragment_program *) program;
820 struct pipe_screen *pscreen = st->pipe->screen;
821 boolean invert = FALSE;
822
823 if (fp->OriginUpperLeft) {
824 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
825 }
826 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
827 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
828 invert = TRUE;
829 }
830 else
831 assert(0);
832 }
833 else {
834 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
835 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
836 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
837 invert = TRUE;
838 else
839 assert(0);
840 }
841
842 if (fp->PixelCenterInteger) {
843 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
844 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
845 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
846 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
847 else
848 assert(0);
849 }
850 else {
851 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
852 }
853 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
854 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
855 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
856 }
857 else
858 assert(0);
859 }
860
861 /* we invert after adjustment so that we avoid the MOV to temporary,
862 * and reuse the adjustment ADD instead */
863 if (invert)
864 emit_inverted_wpos(t, program);
865 }
866
867
868 /**
869 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
870 * TGSI uses +1 for front, -1 for back.
871 * This function converts the TGSI value to the GL value. Simply clamping/
872 * saturating the value to [0,1] does the job.
873 */
874 static void
875 emit_face_var( struct st_translate *t,
876 const struct gl_program *program )
877 {
878 struct ureg_program *ureg = t->ureg;
879 struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
880 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
881
882 /* MOV_SAT face_temp, input[face]
883 */
884 face_temp = ureg_saturate( face_temp );
885 ureg_MOV( ureg, face_temp, face_input );
886
887 /* Use face_temp as face input from here on:
888 */
889 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
890 }
891
892
893 static void
894 emit_edgeflags( struct st_translate *t,
895 const struct gl_program *program )
896 {
897 struct ureg_program *ureg = t->ureg;
898 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
899 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
900
901 ureg_MOV( ureg, edge_dst, edge_src );
902 }
903
904
905 /**
906 * Translate Mesa program to TGSI format.
907 * \param program the program to translate
908 * \param numInputs number of input registers used
909 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
910 * input indexes
911 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
912 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
913 * each input
914 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
915 * \param numOutputs number of output registers used
916 * \param outputMapping maps Mesa fragment program outputs to TGSI
917 * generic outputs
918 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
919 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
920 * each output
921 *
922 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
923 */
924 enum pipe_error
925 st_translate_mesa_program(
926 GLcontext *ctx,
927 uint procType,
928 struct ureg_program *ureg,
929 const struct gl_program *program,
930 GLuint numInputs,
931 const GLuint inputMapping[],
932 const ubyte inputSemanticName[],
933 const ubyte inputSemanticIndex[],
934 const GLuint interpMode[],
935 GLuint numOutputs,
936 const GLuint outputMapping[],
937 const ubyte outputSemanticName[],
938 const ubyte outputSemanticIndex[],
939 boolean passthrough_edgeflags )
940 {
941 struct st_translate translate, *t;
942 unsigned i;
943 enum pipe_error ret = PIPE_OK;
944
945 assert(numInputs <= Elements(t->inputs));
946 assert(numOutputs <= Elements(t->outputs));
947
948 t = &translate;
949 memset(t, 0, sizeof *t);
950
951 t->procType = procType;
952 t->inputMapping = inputMapping;
953 t->outputMapping = outputMapping;
954 t->ureg = ureg;
955 t->pointSizeOutIndex = -1;
956 t->prevInstWrotePointSize = GL_FALSE;
957
958 /*_mesa_print_program(program);*/
959
960 /*
961 * Declare input attributes.
962 */
963 if (procType == TGSI_PROCESSOR_FRAGMENT) {
964 for (i = 0; i < numInputs; i++) {
965 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
966 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
967 inputSemanticName[i],
968 inputSemanticIndex[i],
969 interpMode[i],
970 TGSI_CYLINDRICAL_WRAP_X);
971 }
972 else {
973 t->inputs[i] = ureg_DECL_fs_input(ureg,
974 inputSemanticName[i],
975 inputSemanticIndex[i],
976 interpMode[i]);
977 }
978 }
979
980 if (program->InputsRead & FRAG_BIT_WPOS) {
981 /* Must do this after setting up t->inputs, and before
982 * emitting constant references, below:
983 */
984 emit_wpos(st_context(ctx), t, program, ureg);
985 }
986
987 if (program->InputsRead & FRAG_BIT_FACE) {
988 emit_face_var( t, program );
989 }
990
991 /*
992 * Declare output attributes.
993 */
994 for (i = 0; i < numOutputs; i++) {
995 switch (outputSemanticName[i]) {
996 case TGSI_SEMANTIC_POSITION:
997 t->outputs[i] = ureg_DECL_output( ureg,
998 TGSI_SEMANTIC_POSITION, /* Z / Depth */
999 outputSemanticIndex[i] );
1000
1001 t->outputs[i] = ureg_writemask( t->outputs[i],
1002 TGSI_WRITEMASK_Z );
1003 break;
1004 case TGSI_SEMANTIC_COLOR:
1005 t->outputs[i] = ureg_DECL_output( ureg,
1006 TGSI_SEMANTIC_COLOR,
1007 outputSemanticIndex[i] );
1008 break;
1009 default:
1010 debug_assert(0);
1011 return 0;
1012 }
1013 }
1014 }
1015 else if (procType == TGSI_PROCESSOR_GEOMETRY) {
1016 for (i = 0; i < numInputs; i++) {
1017 t->inputs[i] = ureg_DECL_gs_input(ureg,
1018 i,
1019 inputSemanticName[i],
1020 inputSemanticIndex[i]);
1021 }
1022
1023 for (i = 0; i < numOutputs; i++) {
1024 t->outputs[i] = ureg_DECL_output( ureg,
1025 outputSemanticName[i],
1026 outputSemanticIndex[i] );
1027 }
1028 }
1029 else {
1030 assert(procType == TGSI_PROCESSOR_VERTEX);
1031
1032 for (i = 0; i < numInputs; i++) {
1033 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
1034 }
1035
1036 for (i = 0; i < numOutputs; i++) {
1037 t->outputs[i] = ureg_DECL_output( ureg,
1038 outputSemanticName[i],
1039 outputSemanticIndex[i] );
1040 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
1041 /* Writing to the point size result register requires special
1042 * handling to implement clamping.
1043 */
1044 static const gl_state_index pointSizeClampState[STATE_LENGTH]
1045 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
1046 /* XXX: note we are modifying the incoming shader here! Need to
1047 * do this before emitting the constant decls below, or this
1048 * will be missed:
1049 */
1050 unsigned pointSizeClampConst =
1051 _mesa_add_state_reference(program->Parameters,
1052 pointSizeClampState);
1053 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
1054 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
1055 t->pointSizeResult = t->outputs[i];
1056 t->pointSizeOutIndex = i;
1057 t->outputs[i] = psizregtemp;
1058 }
1059 }
1060 if (passthrough_edgeflags)
1061 emit_edgeflags( t, program );
1062 }
1063
1064 /* Declare address register.
1065 */
1066 if (program->NumAddressRegs > 0) {
1067 debug_assert( program->NumAddressRegs == 1 );
1068 t->address[0] = ureg_DECL_address( ureg );
1069 }
1070
1071 if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) {
1072 /* If temps are accessed with indirect addressing, declare temporaries
1073 * in sequential order. Else, we declare them on demand elsewhere.
1074 */
1075 for (i = 0; i < program->NumTemporaries; i++) {
1076 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
1077 t->temps[i] = ureg_DECL_temporary( t->ureg );
1078 }
1079 }
1080
1081 /* Emit constants and immediates. Mesa uses a single index space
1082 * for these, so we put all the translated regs in t->constants.
1083 */
1084 if (program->Parameters) {
1085 t->constants = CALLOC( program->Parameters->NumParameters,
1086 sizeof t->constants[0] );
1087 if (t->constants == NULL) {
1088 ret = PIPE_ERROR_OUT_OF_MEMORY;
1089 goto out;
1090 }
1091
1092 for (i = 0; i < program->Parameters->NumParameters; i++) {
1093 switch (program->Parameters->Parameters[i].Type) {
1094 case PROGRAM_ENV_PARAM:
1095 case PROGRAM_LOCAL_PARAM:
1096 case PROGRAM_STATE_VAR:
1097 case PROGRAM_NAMED_PARAM:
1098 case PROGRAM_UNIFORM:
1099 t->constants[i] = ureg_DECL_constant( ureg, i );
1100 break;
1101
1102 /* Emit immediates only when there's no indirect addressing of
1103 * the const buffer.
1104 * FIXME: Be smarter and recognize param arrays:
1105 * indirect addressing is only valid within the referenced
1106 * array.
1107 */
1108 case PROGRAM_CONSTANT:
1109 if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST)
1110 t->constants[i] = ureg_DECL_constant( ureg, i );
1111 else
1112 t->constants[i] =
1113 ureg_DECL_immediate( ureg,
1114 program->Parameters->ParameterValues[i],
1115 4 );
1116 break;
1117 default:
1118 break;
1119 }
1120 }
1121 }
1122
1123 /* texture samplers */
1124 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1125 if (program->SamplersUsed & (1 << i)) {
1126 t->samplers[i] = ureg_DECL_sampler( ureg, i );
1127 }
1128 }
1129
1130 /* Emit each instruction in turn:
1131 */
1132 for (i = 0; i < program->NumInstructions; i++) {
1133 set_insn_start( t, ureg_get_instruction_number( ureg ));
1134 compile_instruction( t, &program->Instructions[i] );
1135
1136 if (t->prevInstWrotePointSize && program->Id) {
1137 /* The previous instruction wrote to the (fake) vertex point size
1138 * result register. Now we need to clamp that value to the min/max
1139 * point size range, putting the result into the real point size
1140 * register.
1141 * Note that we can't do this easily at the end of program due to
1142 * possible early return.
1143 */
1144 set_insn_start( t, ureg_get_instruction_number( ureg ));
1145 ureg_MAX( t->ureg,
1146 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
1147 ureg_src(t->outputs[t->pointSizeOutIndex]),
1148 ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1149 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
1150 ureg_src(t->outputs[t->pointSizeOutIndex]),
1151 ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1152 }
1153 t->prevInstWrotePointSize = GL_FALSE;
1154 }
1155
1156 /* Fix up all emitted labels:
1157 */
1158 for (i = 0; i < t->labels_count; i++) {
1159 ureg_fixup_label( ureg,
1160 t->labels[i].token,
1161 t->insn[t->labels[i].branch_target] );
1162 }
1163
1164 out:
1165 FREE(t->insn);
1166 FREE(t->labels);
1167 FREE(t->constants);
1168
1169 if (t->error) {
1170 debug_printf("%s: translate error flag set\n", __FUNCTION__);
1171 }
1172
1173 return ret;
1174 }
1175
1176
1177 /**
1178 * Tokens cannot be free with free otherwise the builtin gallium
1179 * malloc debugging will get confused.
1180 */
1181 void
1182 st_free_tokens(const struct tgsi_token *tokens)
1183 {
1184 FREE((void *)tokens);
1185 }