Merge branch 'gallium-nopointsizeminmax'
[mesa.git] / src / mesa / state_tracker / st_mesa_to_tgsi.c
1 /**************************************************************************
2 *
3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * \author
30 * Michal Krol,
31 * Keith Whitwell
32 */
33
34 #include "pipe/p_compiler.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "pipe/p_state.h"
37 #include "pipe/p_context.h"
38 #include "tgsi/tgsi_ureg.h"
39 #include "st_mesa_to_tgsi.h"
40 #include "st_context.h"
41 #include "shader/prog_instruction.h"
42 #include "shader/prog_parameter.h"
43 #include "util/u_debug.h"
44 #include "util/u_math.h"
45 #include "util/u_memory.h"
46
47 struct label {
48 unsigned branch_target;
49 unsigned token;
50 };
51
52
53 /**
54 * Intermediate state used during shader translation.
55 */
56 struct st_translate {
57 struct ureg_program *ureg;
58
59 struct ureg_dst temps[MAX_PROGRAM_TEMPS];
60 struct ureg_src *constants;
61 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
62 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
63 struct ureg_dst address[1];
64 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
65 struct ureg_dst psizregreal;
66 struct ureg_src pointSizeConst;
67 GLint psizoutindex;
68 GLboolean prevInstWrotePsiz;
69
70 const GLuint *inputMapping;
71 const GLuint *outputMapping;
72
73 /* For every instruction that contains a label (eg CALL), keep
74 * details so that we can go back afterwards and emit the correct
75 * tgsi instruction number for each label.
76 */
77 struct label *labels;
78 unsigned labels_size;
79 unsigned labels_count;
80
81 /* Keep a record of the tgsi instruction number that each mesa
82 * instruction starts at, will be used to fix up labels after
83 * translation.
84 */
85 unsigned *insn;
86 unsigned insn_size;
87 unsigned insn_count;
88
89 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
90
91 boolean error;
92 };
93
94
95 static unsigned *get_label( struct st_translate *t,
96 unsigned branch_target )
97 {
98 unsigned i;
99
100 if (t->labels_count + 1 >= t->labels_size) {
101 unsigned old_size = t->labels_size;
102 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
103 t->labels = REALLOC( t->labels,
104 old_size * sizeof t->labels[0],
105 t->labels_size * sizeof t->labels[0] );
106 if (t->labels == NULL) {
107 static unsigned dummy;
108 t->error = TRUE;
109 return &dummy;
110 }
111 }
112
113 i = t->labels_count++;
114 t->labels[i].branch_target = branch_target;
115 return &t->labels[i].token;
116 }
117
118
119 static void set_insn_start( struct st_translate *t,
120 unsigned start )
121 {
122 if (t->insn_count + 1 >= t->insn_size) {
123 unsigned old_size = t->insn_size;
124 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
125 t->insn = REALLOC( t->insn,
126 old_size * sizeof t->insn[0],
127 t->insn_size * sizeof t->insn[0] );
128 if (t->insn == NULL) {
129 t->error = TRUE;
130 return;
131 }
132 }
133
134 t->insn[t->insn_count++] = start;
135 }
136
137
138 /*
139 * Map mesa register file to TGSI register file.
140 */
141 static struct ureg_dst
142 dst_register( struct st_translate *t,
143 gl_register_file file,
144 GLuint index )
145 {
146 switch( file ) {
147 case PROGRAM_UNDEFINED:
148 return ureg_dst_undef();
149
150 case PROGRAM_TEMPORARY:
151 if (ureg_dst_is_undef(t->temps[index]))
152 t->temps[index] = ureg_DECL_temporary( t->ureg );
153
154 return t->temps[index];
155
156 case PROGRAM_OUTPUT:
157 if (index == t->psizoutindex)
158 t->prevInstWrotePsiz = GL_TRUE;
159 return t->outputs[t->outputMapping[index]];
160
161 case PROGRAM_ADDRESS:
162 return t->address[index];
163
164 default:
165 debug_assert( 0 );
166 return ureg_dst_undef();
167 }
168 }
169
170
171 static struct ureg_src
172 src_register( struct st_translate *t,
173 gl_register_file file,
174 GLint index )
175 {
176 switch( file ) {
177 case PROGRAM_UNDEFINED:
178 return ureg_src_undef();
179
180 case PROGRAM_TEMPORARY:
181 ASSERT(index >= 0);
182 if (ureg_dst_is_undef(t->temps[index]))
183 t->temps[index] = ureg_DECL_temporary( t->ureg );
184 return ureg_src(t->temps[index]);
185
186 case PROGRAM_NAMED_PARAM:
187 case PROGRAM_ENV_PARAM:
188 case PROGRAM_LOCAL_PARAM:
189 case PROGRAM_UNIFORM:
190 ASSERT(index >= 0);
191 return t->constants[index];
192 case PROGRAM_STATE_VAR:
193 case PROGRAM_CONSTANT: /* ie, immediate */
194 if (index < 0)
195 return ureg_DECL_constant( t->ureg, 0 );
196 else
197 return t->constants[index];
198
199 case PROGRAM_INPUT:
200 return t->inputs[t->inputMapping[index]];
201
202 case PROGRAM_OUTPUT:
203 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
204
205 case PROGRAM_ADDRESS:
206 return ureg_src(t->address[index]);
207
208 default:
209 debug_assert( 0 );
210 return ureg_src_undef();
211 }
212 }
213
214
215 /**
216 * Map mesa texture target to TGSI texture target.
217 */
218 static unsigned
219 translate_texture_target( GLuint textarget,
220 GLboolean shadow )
221 {
222 if (shadow) {
223 switch( textarget ) {
224 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D;
225 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D;
226 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT;
227 default: break;
228 }
229 }
230
231 switch( textarget ) {
232 case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D;
233 case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D;
234 case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D;
235 case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE;
236 case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT;
237 default:
238 debug_assert( 0 );
239 return TGSI_TEXTURE_1D;
240 }
241 }
242
243
244 static struct ureg_dst
245 translate_dst( struct st_translate *t,
246 const struct prog_dst_register *DstReg,
247 boolean saturate )
248 {
249 struct ureg_dst dst = dst_register( t,
250 DstReg->File,
251 DstReg->Index );
252
253 dst = ureg_writemask( dst,
254 DstReg->WriteMask );
255
256 if (saturate)
257 dst = ureg_saturate( dst );
258
259 if (DstReg->RelAddr)
260 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
261
262 return dst;
263 }
264
265
266 static struct ureg_src
267 translate_src( struct st_translate *t,
268 const struct prog_src_register *SrcReg )
269 {
270 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
271
272 src = ureg_swizzle( src,
273 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3,
274 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3,
275 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3,
276 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3);
277
278 if (SrcReg->Negate == NEGATE_XYZW)
279 src = ureg_negate(src);
280
281 if (SrcReg->Abs)
282 src = ureg_abs(src);
283
284 if (SrcReg->RelAddr) {
285 src = ureg_src_indirect( src, ureg_src(t->address[0]));
286 /* If SrcReg->Index was negative, it was set to zero in
287 * src_register(). Reassign it now.
288 */
289 src.Index = SrcReg->Index;
290 }
291
292 return src;
293 }
294
295
296 static struct ureg_src swizzle_4v( struct ureg_src src,
297 const unsigned *swz )
298 {
299 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] );
300 }
301
302
303 /**
304 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG:
305 *
306 * SWZ dst, src.x-y10
307 *
308 * becomes:
309 *
310 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
311 */
312 static void emit_swz( struct st_translate *t,
313 struct ureg_dst dst,
314 const struct prog_src_register *SrcReg )
315 {
316 struct ureg_program *ureg = t->ureg;
317 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );
318
319 unsigned negate_mask = SrcReg->Negate;
320
321 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
322 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
323 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
324 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);
325
326 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
327 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
328 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
329 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);
330
331 unsigned negative_one_mask = one_mask & negate_mask;
332 unsigned positive_one_mask = one_mask & ~negate_mask;
333
334 struct ureg_src imm;
335 unsigned i;
336 unsigned mul_swizzle[4] = {0,0,0,0};
337 unsigned add_swizzle[4] = {0,0,0,0};
338 unsigned src_swizzle[4] = {0,0,0,0};
339 boolean need_add = FALSE;
340 boolean need_mul = FALSE;
341
342 if (dst.WriteMask == 0)
343 return;
344
345 /* Is this just a MOV?
346 */
347 if (zero_mask == 0 &&
348 one_mask == 0 &&
349 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW))
350 {
351 ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
352 return;
353 }
354
355 #define IMM_ZERO 0
356 #define IMM_ONE 1
357 #define IMM_NEG_ONE 2
358
359 imm = ureg_imm3f( ureg, 0, 1, -1 );
360
361 for (i = 0; i < 4; i++) {
362 unsigned bit = 1 << i;
363
364 if (dst.WriteMask & bit) {
365 if (positive_one_mask & bit) {
366 mul_swizzle[i] = IMM_ZERO;
367 add_swizzle[i] = IMM_ONE;
368 need_add = TRUE;
369 }
370 else if (negative_one_mask & bit) {
371 mul_swizzle[i] = IMM_ZERO;
372 add_swizzle[i] = IMM_NEG_ONE;
373 need_add = TRUE;
374 }
375 else if (zero_mask & bit) {
376 mul_swizzle[i] = IMM_ZERO;
377 add_swizzle[i] = IMM_ZERO;
378 need_add = TRUE;
379 }
380 else {
381 add_swizzle[i] = IMM_ZERO;
382 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
383 need_mul = TRUE;
384 if (negate_mask & bit) {
385 mul_swizzle[i] = IMM_NEG_ONE;
386 }
387 else {
388 mul_swizzle[i] = IMM_ONE;
389 }
390 }
391 }
392 }
393
394 if (need_mul && need_add) {
395 ureg_MAD( ureg,
396 dst,
397 swizzle_4v( src, src_swizzle ),
398 swizzle_4v( imm, mul_swizzle ),
399 swizzle_4v( imm, add_swizzle ) );
400 }
401 else if (need_mul) {
402 ureg_MUL( ureg,
403 dst,
404 swizzle_4v( src, src_swizzle ),
405 swizzle_4v( imm, mul_swizzle ) );
406 }
407 else if (need_add) {
408 ureg_MOV( ureg,
409 dst,
410 swizzle_4v( imm, add_swizzle ) );
411 }
412 else {
413 debug_assert(0);
414 }
415
416 #undef IMM_ZERO
417 #undef IMM_ONE
418 #undef IMM_NEG_ONE
419 }
420
421
422 /**
423 * Negate the value of DDY to match GL semantics where (0,0) is the
424 * lower-left corner of the window.
425 * Note that the GL_ARB_fragment_coord_conventions extension will
426 * effect this someday.
427 */
428 static void emit_ddy( struct st_translate *t,
429 struct ureg_dst dst,
430 const struct prog_src_register *SrcReg )
431 {
432 struct ureg_program *ureg = t->ureg;
433 struct ureg_src src = translate_src( t, SrcReg );
434 src = ureg_negate( src );
435 ureg_DDY( ureg, dst, src );
436 }
437
438
439
440 static unsigned
441 translate_opcode( unsigned op )
442 {
443 switch( op ) {
444 case OPCODE_ARL:
445 return TGSI_OPCODE_ARL;
446 case OPCODE_ABS:
447 return TGSI_OPCODE_ABS;
448 case OPCODE_ADD:
449 return TGSI_OPCODE_ADD;
450 case OPCODE_BGNLOOP:
451 return TGSI_OPCODE_BGNLOOP;
452 case OPCODE_BGNSUB:
453 return TGSI_OPCODE_BGNSUB;
454 case OPCODE_BRA:
455 return TGSI_OPCODE_BRA;
456 case OPCODE_BRK:
457 return TGSI_OPCODE_BRK;
458 case OPCODE_CAL:
459 return TGSI_OPCODE_CAL;
460 case OPCODE_CMP:
461 return TGSI_OPCODE_CMP;
462 case OPCODE_CONT:
463 return TGSI_OPCODE_CONT;
464 case OPCODE_COS:
465 return TGSI_OPCODE_COS;
466 case OPCODE_DDX:
467 return TGSI_OPCODE_DDX;
468 case OPCODE_DDY:
469 return TGSI_OPCODE_DDY;
470 case OPCODE_DP2:
471 return TGSI_OPCODE_DP2;
472 case OPCODE_DP2A:
473 return TGSI_OPCODE_DP2A;
474 case OPCODE_DP3:
475 return TGSI_OPCODE_DP3;
476 case OPCODE_DP4:
477 return TGSI_OPCODE_DP4;
478 case OPCODE_DPH:
479 return TGSI_OPCODE_DPH;
480 case OPCODE_DST:
481 return TGSI_OPCODE_DST;
482 case OPCODE_ELSE:
483 return TGSI_OPCODE_ELSE;
484 case OPCODE_ENDIF:
485 return TGSI_OPCODE_ENDIF;
486 case OPCODE_ENDLOOP:
487 return TGSI_OPCODE_ENDLOOP;
488 case OPCODE_ENDSUB:
489 return TGSI_OPCODE_ENDSUB;
490 case OPCODE_EX2:
491 return TGSI_OPCODE_EX2;
492 case OPCODE_EXP:
493 return TGSI_OPCODE_EXP;
494 case OPCODE_FLR:
495 return TGSI_OPCODE_FLR;
496 case OPCODE_FRC:
497 return TGSI_OPCODE_FRC;
498 case OPCODE_IF:
499 return TGSI_OPCODE_IF;
500 case OPCODE_TRUNC:
501 return TGSI_OPCODE_TRUNC;
502 case OPCODE_KIL:
503 return TGSI_OPCODE_KIL;
504 case OPCODE_KIL_NV:
505 return TGSI_OPCODE_KILP;
506 case OPCODE_LG2:
507 return TGSI_OPCODE_LG2;
508 case OPCODE_LOG:
509 return TGSI_OPCODE_LOG;
510 case OPCODE_LIT:
511 return TGSI_OPCODE_LIT;
512 case OPCODE_LRP:
513 return TGSI_OPCODE_LRP;
514 case OPCODE_MAD:
515 return TGSI_OPCODE_MAD;
516 case OPCODE_MAX:
517 return TGSI_OPCODE_MAX;
518 case OPCODE_MIN:
519 return TGSI_OPCODE_MIN;
520 case OPCODE_MOV:
521 return TGSI_OPCODE_MOV;
522 case OPCODE_MUL:
523 return TGSI_OPCODE_MUL;
524 case OPCODE_NOP:
525 return TGSI_OPCODE_NOP;
526 case OPCODE_NRM3:
527 return TGSI_OPCODE_NRM;
528 case OPCODE_NRM4:
529 return TGSI_OPCODE_NRM4;
530 case OPCODE_POW:
531 return TGSI_OPCODE_POW;
532 case OPCODE_RCP:
533 return TGSI_OPCODE_RCP;
534 case OPCODE_RET:
535 return TGSI_OPCODE_RET;
536 case OPCODE_RSQ:
537 return TGSI_OPCODE_RSQ;
538 case OPCODE_SCS:
539 return TGSI_OPCODE_SCS;
540 case OPCODE_SEQ:
541 return TGSI_OPCODE_SEQ;
542 case OPCODE_SGE:
543 return TGSI_OPCODE_SGE;
544 case OPCODE_SGT:
545 return TGSI_OPCODE_SGT;
546 case OPCODE_SIN:
547 return TGSI_OPCODE_SIN;
548 case OPCODE_SLE:
549 return TGSI_OPCODE_SLE;
550 case OPCODE_SLT:
551 return TGSI_OPCODE_SLT;
552 case OPCODE_SNE:
553 return TGSI_OPCODE_SNE;
554 case OPCODE_SSG:
555 return TGSI_OPCODE_SSG;
556 case OPCODE_SUB:
557 return TGSI_OPCODE_SUB;
558 case OPCODE_TEX:
559 return TGSI_OPCODE_TEX;
560 case OPCODE_TXB:
561 return TGSI_OPCODE_TXB;
562 case OPCODE_TXD:
563 return TGSI_OPCODE_TXD;
564 case OPCODE_TXL:
565 return TGSI_OPCODE_TXL;
566 case OPCODE_TXP:
567 return TGSI_OPCODE_TXP;
568 case OPCODE_XPD:
569 return TGSI_OPCODE_XPD;
570 case OPCODE_END:
571 return TGSI_OPCODE_END;
572 default:
573 debug_assert( 0 );
574 return TGSI_OPCODE_NOP;
575 }
576 }
577
578
579 static void
580 compile_instruction(
581 struct st_translate *t,
582 const struct prog_instruction *inst )
583 {
584 struct ureg_program *ureg = t->ureg;
585 GLuint i;
586 struct ureg_dst dst[1];
587 struct ureg_src src[4];
588 unsigned num_dst;
589 unsigned num_src;
590
591 num_dst = _mesa_num_inst_dst_regs( inst->Opcode );
592 num_src = _mesa_num_inst_src_regs( inst->Opcode );
593
594 if (num_dst)
595 dst[0] = translate_dst( t,
596 &inst->DstReg,
597 inst->SaturateMode );
598
599 for (i = 0; i < num_src; i++)
600 src[i] = translate_src( t, &inst->SrcReg[i] );
601
602 switch( inst->Opcode ) {
603 case OPCODE_SWZ:
604 emit_swz( t, dst[0], &inst->SrcReg[0] );
605 return;
606
607 case OPCODE_BGNLOOP:
608 case OPCODE_CAL:
609 case OPCODE_ELSE:
610 case OPCODE_ENDLOOP:
611 case OPCODE_IF:
612 debug_assert(num_dst == 0);
613 ureg_label_insn( ureg,
614 translate_opcode( inst->Opcode ),
615 src, num_src,
616 get_label( t, inst->BranchTarget ));
617 return;
618
619 case OPCODE_TEX:
620 case OPCODE_TXB:
621 case OPCODE_TXD:
622 case OPCODE_TXL:
623 case OPCODE_TXP:
624 src[num_src++] = t->samplers[inst->TexSrcUnit];
625 ureg_tex_insn( ureg,
626 translate_opcode( inst->Opcode ),
627 dst, num_dst,
628 translate_texture_target( inst->TexSrcTarget,
629 inst->TexShadow ),
630 src, num_src );
631 return;
632
633 case OPCODE_SCS:
634 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
635 ureg_insn( ureg,
636 translate_opcode( inst->Opcode ),
637 dst, num_dst,
638 src, num_src );
639 break;
640
641 case OPCODE_XPD:
642 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
643 ureg_insn( ureg,
644 translate_opcode( inst->Opcode ),
645 dst, num_dst,
646 src, num_src );
647 break;
648
649 case OPCODE_NOISE1:
650 case OPCODE_NOISE2:
651 case OPCODE_NOISE3:
652 case OPCODE_NOISE4:
653 /* At some point, a motivated person could add a better
654 * implementation of noise. Currently not even the nvidia
655 * binary drivers do anything more than this. In any case, the
656 * place to do this is in the GL state tracker, not the poor
657 * driver.
658 */
659 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
660 break;
661
662 case OPCODE_DDY:
663 emit_ddy( t, dst[0], &inst->SrcReg[0] );
664 break;
665
666 default:
667 ureg_insn( ureg,
668 translate_opcode( inst->Opcode ),
669 dst, num_dst,
670 src, num_src );
671 break;
672 }
673 }
674
675 /**
676 * Emit the TGSI instructions to adjust the WPOS pixel center convention
677 */
678 static void
679 emit_adjusted_wpos( struct st_translate *t,
680 const struct gl_program *program, GLfloat value)
681 {
682 struct ureg_program *ureg = t->ureg;
683 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
684 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
685
686 ureg_ADD(ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y),
687 wpos_input, ureg_imm1f(ureg, value));
688
689 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
690 }
691
692 /**
693 * Emit the TGSI instructions for inverting the WPOS y coordinate.
694 */
695 static void
696 emit_inverted_wpos( struct st_translate *t,
697 const struct gl_program *program )
698 {
699 struct ureg_program *ureg = t->ureg;
700
701 /* Fragment program uses fragment position input.
702 * Need to replace instances of INPUT[WPOS] with temp T
703 * where T = INPUT[WPOS] by y is inverted.
704 */
705 static const gl_state_index winSizeState[STATE_LENGTH]
706 = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
707
708 /* XXX: note we are modifying the incoming shader here! Need to
709 * do this before emitting the constant decls below, or this
710 * will be missed:
711 */
712 unsigned winHeightConst = _mesa_add_state_reference(program->Parameters,
713 winSizeState);
714
715 struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst );
716 struct ureg_dst wpos_temp;
717 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
718
719 /* MOV wpos_temp, input[wpos]
720 */
721 if (wpos_input.File == TGSI_FILE_TEMPORARY)
722 wpos_temp = ureg_dst(wpos_input);
723 else {
724 wpos_temp = ureg_DECL_temporary( ureg );
725 ureg_MOV( ureg, wpos_temp, wpos_input );
726 }
727
728 /* SUB wpos_temp.y, winsize_const, wpos_input
729 */
730 ureg_SUB( ureg,
731 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
732 winsize,
733 wpos_input);
734
735 /* Use wpos_temp as position input from here on:
736 */
737 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
738 }
739
740
741 /**
742 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
743 * TGSI uses +1 for front, -1 for back.
744 * This function converts the TGSI value to the GL value. Simply clamping/
745 * saturating the value to [0,1] does the job.
746 */
747 static void
748 emit_face_var( struct st_translate *t,
749 const struct gl_program *program )
750 {
751 struct ureg_program *ureg = t->ureg;
752 struct ureg_dst face_temp = ureg_DECL_temporary( ureg );
753 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
754
755 /* MOV_SAT face_temp, input[face]
756 */
757 face_temp = ureg_saturate( face_temp );
758 ureg_MOV( ureg, face_temp, face_input );
759
760 /* Use face_temp as face input from here on:
761 */
762 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
763 }
764
765
766 static void
767 emit_edgeflags( struct st_translate *t,
768 const struct gl_program *program )
769 {
770 struct ureg_program *ureg = t->ureg;
771 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
772 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
773
774 ureg_MOV( ureg, edge_dst, edge_src );
775 }
776
777
778 /**
779 * Translate Mesa program to TGSI format.
780 * \param program the program to translate
781 * \param numInputs number of input registers used
782 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
783 * input indexes
784 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
785 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
786 * each input
787 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
788 * \param numOutputs number of output registers used
789 * \param outputMapping maps Mesa fragment program outputs to TGSI
790 * generic outputs
791 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
792 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
793 * each output
794 *
795 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
796 */
797 enum pipe_error
798 st_translate_mesa_program(
799 GLcontext *ctx,
800 uint procType,
801 struct ureg_program *ureg,
802 const struct gl_program *program,
803 GLuint numInputs,
804 const GLuint inputMapping[],
805 const ubyte inputSemanticName[],
806 const ubyte inputSemanticIndex[],
807 const GLuint interpMode[],
808 GLuint numOutputs,
809 const GLuint outputMapping[],
810 const ubyte outputSemanticName[],
811 const ubyte outputSemanticIndex[],
812 boolean passthrough_edgeflags )
813 {
814 struct st_translate translate, *t;
815 unsigned i;
816 enum pipe_error ret = PIPE_OK;
817
818 t = &translate;
819 memset(t, 0, sizeof *t);
820
821 t->procType = procType;
822 t->inputMapping = inputMapping;
823 t->outputMapping = outputMapping;
824 t->ureg = ureg;
825 t->psizoutindex = -1;
826 t->prevInstWrotePsiz = GL_FALSE;
827
828 /*_mesa_print_program(program);*/
829
830 /*
831 * Declare input attributes.
832 */
833 if (procType == TGSI_PROCESSOR_FRAGMENT) {
834 struct gl_fragment_program* fp = (struct gl_fragment_program*)program;
835 for (i = 0; i < numInputs; i++) {
836 if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) {
837 t->inputs[i] = ureg_DECL_fs_input_cyl(ureg,
838 inputSemanticName[i],
839 inputSemanticIndex[i],
840 interpMode[i],
841 TGSI_CYLINDRICAL_WRAP_X);
842 }
843 else {
844 t->inputs[i] = ureg_DECL_fs_input(ureg,
845 inputSemanticName[i],
846 inputSemanticIndex[i],
847 interpMode[i]);
848 }
849 }
850
851 if (program->InputsRead & FRAG_BIT_WPOS) {
852 /* Must do this after setting up t->inputs, and before
853 * emitting constant references, below:
854 */
855 struct pipe_screen* pscreen = st_context(ctx)->pipe->screen;
856 boolean invert = FALSE;
857
858 if (fp->OriginUpperLeft) {
859 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
860 }
861 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
862 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
863 invert = TRUE;
864 }
865 else
866 assert(0);
867 }
868 else {
869 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
870 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
871 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
872 invert = TRUE;
873 else
874 assert(0);
875 }
876
877 if (fp->PixelCenterInteger) {
878 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
879 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
880 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
881 emit_adjusted_wpos(t, program, invert ? 0.5f : -0.5f);
882 else
883 assert(0);
884 }
885 else {
886 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
887 }
888 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
889 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
890 emit_adjusted_wpos(t, program, invert ? -0.5f : 0.5f);
891 }
892 else
893 assert(0);
894 }
895
896 /* we invert after adjustment so that we avoid the MOV to temporary,
897 * and reuse the adjustment ADD instead */
898 if (invert)
899 emit_inverted_wpos(t, program);
900 }
901
902 if (program->InputsRead & FRAG_BIT_FACE) {
903 emit_face_var( t, program );
904 }
905
906 /*
907 * Declare output attributes.
908 */
909 for (i = 0; i < numOutputs; i++) {
910 switch (outputSemanticName[i]) {
911 case TGSI_SEMANTIC_POSITION:
912 t->outputs[i] = ureg_DECL_output( ureg,
913 TGSI_SEMANTIC_POSITION, /* Z / Depth */
914 outputSemanticIndex[i] );
915
916 t->outputs[i] = ureg_writemask( t->outputs[i],
917 TGSI_WRITEMASK_Z );
918 break;
919 case TGSI_SEMANTIC_COLOR:
920 t->outputs[i] = ureg_DECL_output( ureg,
921 TGSI_SEMANTIC_COLOR,
922 outputSemanticIndex[i] );
923 break;
924 default:
925 debug_assert(0);
926 return 0;
927 }
928 }
929 }
930 else {
931 for (i = 0; i < numInputs; i++) {
932 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
933 }
934
935 for (i = 0; i < numOutputs; i++) {
936 t->outputs[i] = ureg_DECL_output( ureg,
937 outputSemanticName[i],
938 outputSemanticIndex[i] );
939 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) {
940 static const gl_state_index pointSizeClampState[STATE_LENGTH]
941 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 };
942 /* XXX: note we are modifying the incoming shader here! Need to
943 * do this before emitting the constant decls below, or this
944 * will be missed:
945 */
946 unsigned pointSizeClampConst = _mesa_add_state_reference(program->Parameters,
947 pointSizeClampState);
948 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
949 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
950 t->psizregreal = t->outputs[i];
951 t->psizoutindex = i;
952 t->outputs[i] = psizregtemp;
953 }
954 }
955 if (passthrough_edgeflags)
956 emit_edgeflags( t, program );
957 }
958
959 /* Declare address register.
960 */
961 if (program->NumAddressRegs > 0) {
962 debug_assert( program->NumAddressRegs == 1 );
963 t->address[0] = ureg_DECL_address( ureg );
964 }
965
966
967 /* Emit constants and immediates. Mesa uses a single index space
968 * for these, so we put all the translated regs in t->constants.
969 */
970 if (program->Parameters) {
971
972 t->constants = CALLOC( program->Parameters->NumParameters,
973 sizeof t->constants[0] );
974 if (t->constants == NULL) {
975 ret = PIPE_ERROR_OUT_OF_MEMORY;
976 goto out;
977 }
978
979 for (i = 0; i < program->Parameters->NumParameters; i++) {
980 switch (program->Parameters->Parameters[i].Type) {
981 case PROGRAM_ENV_PARAM:
982 case PROGRAM_LOCAL_PARAM:
983 case PROGRAM_STATE_VAR:
984 case PROGRAM_NAMED_PARAM:
985 case PROGRAM_UNIFORM:
986 t->constants[i] = ureg_DECL_constant( ureg, i );
987 break;
988
989 /* Emit immediates only when there is no address register
990 * in use. FIXME: Be smarter and recognize param arrays:
991 * indirect addressing is only valid within the referenced
992 * array.
993 */
994 case PROGRAM_CONSTANT:
995 if (program->NumAddressRegs > 0)
996 t->constants[i] = ureg_DECL_constant( ureg, i );
997 else
998 t->constants[i] =
999 ureg_DECL_immediate( ureg,
1000 program->Parameters->ParameterValues[i],
1001 4 );
1002 break;
1003 default:
1004 break;
1005 }
1006 }
1007 }
1008
1009 /* texture samplers */
1010 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1011 if (program->SamplersUsed & (1 << i)) {
1012 t->samplers[i] = ureg_DECL_sampler( ureg, i );
1013 }
1014 }
1015
1016 /* Emit each instruction in turn:
1017 */
1018 for (i = 0; i < program->NumInstructions; i++) {
1019 set_insn_start( t, ureg_get_instruction_number( ureg ));
1020 compile_instruction( t, &program->Instructions[i] );
1021
1022 /* note can't do that easily at the end of prog due to
1023 possible early return */
1024 if (t->prevInstWrotePsiz && program->Id) {
1025 set_insn_start( t, ureg_get_instruction_number( ureg ));
1026 ureg_MAX( t->ureg, ureg_writemask(t->outputs[t->psizoutindex], WRITEMASK_X),
1027 ureg_src(t->outputs[t->psizoutindex]),
1028 ureg_swizzle(t->pointSizeConst, 1,1,1,1));
1029 ureg_MIN( t->ureg, ureg_writemask(t->psizregreal, WRITEMASK_X),
1030 ureg_src(t->outputs[t->psizoutindex]),
1031 ureg_swizzle(t->pointSizeConst, 2,2,2,2));
1032 }
1033 t->prevInstWrotePsiz = GL_FALSE;
1034 }
1035
1036 /* Fix up all emitted labels:
1037 */
1038 for (i = 0; i < t->labels_count; i++) {
1039 ureg_fixup_label( ureg,
1040 t->labels[i].token,
1041 t->insn[t->labels[i].branch_target] );
1042 }
1043
1044 out:
1045 FREE(t->insn);
1046 FREE(t->labels);
1047 FREE(t->constants);
1048
1049 if (t->error) {
1050 debug_printf("%s: translate error flag set\n", __FUNCTION__);
1051 }
1052
1053 return ret;
1054 }
1055
1056
1057 /**
1058 * Tokens cannot be free with _mesa_free otherwise the builtin gallium
1059 * malloc debugging will get confused.
1060 */
1061 void
1062 st_free_tokens(const struct tgsi_token *tokens)
1063 {
1064 FREE((void *)tokens);
1065 }