svga: formatting fixes in svga_tgsi_insn.c
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "util/u_memory.h"
31 #include "util/u_math.h"
32
33 #include "svga_tgsi_emit.h"
34 #include "svga_context.h"
35
36
37 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
38 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
39
40
41 static unsigned
42 translate_opcode(uint opcode)
43 {
44 switch (opcode) {
45 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
46 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
47 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
48 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
49 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
50 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
51 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
52 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
53 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
54 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
55 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
56 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
57 case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
58 default:
59 debug_printf("Unkown opcode %u\n", opcode);
60 assert( 0 );
61 return SVGA3DOP_LAST_INST;
62 }
63 }
64
65
66 static unsigned
67 translate_file(unsigned file)
68 {
69 switch (file) {
70 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
71 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
72 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
73 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
74 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
75 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
76 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
77 default:
78 assert( 0 );
79 return SVGA3DREG_TEMP;
80 }
81 }
82
83
84 static SVGA3dShaderDestToken
85 translate_dst_register( struct svga_shader_emitter *emit,
86 const struct tgsi_full_instruction *insn,
87 unsigned idx )
88 {
89 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
90 SVGA3dShaderDestToken dest;
91
92 switch (reg->Register.File) {
93 case TGSI_FILE_OUTPUT:
94 /* Output registers encode semantic information in their name.
95 * Need to lookup a table built at decl time:
96 */
97 dest = emit->output_map[reg->Register.Index];
98 break;
99
100 default:
101 {
102 unsigned index = reg->Register.Index;
103 assert(index < SVGA3D_TEMPREG_MAX);
104 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
105 dest = dst_register(translate_file(reg->Register.File), index);
106 }
107 break;
108 }
109
110 dest.mask = reg->Register.WriteMask;
111 assert(dest.mask);
112
113 if (insn->Instruction.Saturate)
114 dest.dstMod = SVGA3DDSTMOD_SATURATE;
115
116 return dest;
117 }
118
119
120 static struct src_register
121 swizzle( struct src_register src,
122 int x,
123 int y,
124 int z,
125 int w )
126 {
127 x = (src.base.swizzle >> (x * 2)) & 0x3;
128 y = (src.base.swizzle >> (y * 2)) & 0x3;
129 z = (src.base.swizzle >> (z * 2)) & 0x3;
130 w = (src.base.swizzle >> (w * 2)) & 0x3;
131
132 src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
133
134 return src;
135 }
136
137
138 static struct src_register
139 scalar( struct src_register src,
140 int comp )
141 {
142 return swizzle( src, comp, comp, comp, comp );
143 }
144
145
146 static boolean
147 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
148 {
149 int i;
150
151 for (i = 0; i < emit->num_arl_consts; ++i) {
152 if (emit->arl_consts[i].arl_num == emit->current_arl)
153 return TRUE;
154 }
155 return FALSE;
156 }
157
158
159 static int
160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
161 {
162 int i;
163
164 for (i = 0; i < emit->num_arl_consts; ++i) {
165 if (emit->arl_consts[i].arl_num == emit->current_arl)
166 return emit->arl_consts[i].number;
167 }
168 return 0;
169 }
170
171
172 static struct src_register
173 translate_src_register( const struct svga_shader_emitter *emit,
174 const struct tgsi_full_src_register *reg )
175 {
176 struct src_register src;
177
178 switch (reg->Register.File) {
179 case TGSI_FILE_INPUT:
180 /* Input registers are referred to by their semantic name rather
181 * than by index. Use the mapping build up from the decls:
182 */
183 src = emit->input_map[reg->Register.Index];
184 break;
185
186 case TGSI_FILE_IMMEDIATE:
187 /* Immediates are appended after TGSI constants in the D3D
188 * constant buffer.
189 */
190 src = src_register( translate_file( reg->Register.File ),
191 reg->Register.Index + emit->imm_start );
192 break;
193
194 default:
195 src = src_register( translate_file( reg->Register.File ),
196 reg->Register.Index );
197 break;
198 }
199
200 /* Indirect addressing.
201 */
202 if (reg->Register.Indirect) {
203 if (emit->unit == PIPE_SHADER_FRAGMENT) {
204 /* Pixel shaders have only loop registers for relative
205 * addressing into inputs. Ignore the redundant address
206 * register, the contents of aL should be in sync with it.
207 */
208 if (reg->Register.File == TGSI_FILE_INPUT) {
209 src.base.relAddr = 1;
210 src.indirect = src_token(SVGA3DREG_LOOP, 0);
211 }
212 }
213 else {
214 /* Constant buffers only.
215 */
216 if (reg->Register.File == TGSI_FILE_CONSTANT) {
217 /* we shift the offset towards the minimum */
218 if (svga_arl_needs_adjustment( emit )) {
219 src.base.num -= svga_arl_adjustment( emit );
220 }
221 src.base.relAddr = 1;
222
223 /* Not really sure what should go in the second token:
224 */
225 src.indirect = src_token( SVGA3DREG_ADDR,
226 reg->Indirect.Index );
227
228 src.indirect.swizzle = SWIZZLE_XXXX;
229 }
230 }
231 }
232
233 src = swizzle( src,
234 reg->Register.SwizzleX,
235 reg->Register.SwizzleY,
236 reg->Register.SwizzleZ,
237 reg->Register.SwizzleW );
238
239 /* src.mod isn't a bitfield, unfortunately:
240 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
241 */
242 if (reg->Register.Absolute) {
243 if (reg->Register.Negate)
244 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
245 else
246 src.base.srcMod = SVGA3DSRCMOD_ABS;
247 }
248 else {
249 if (reg->Register.Negate)
250 src.base.srcMod = SVGA3DSRCMOD_NEG;
251 else
252 src.base.srcMod = SVGA3DSRCMOD_NONE;
253 }
254
255 return src;
256 }
257
258
259 /*
260 * Get a temporary register.
261 * Note: if we exceed the temporary register limit we just use
262 * register SVGA3D_TEMPREG_MAX - 1.
263 */
264 static SVGA3dShaderDestToken
265 get_temp( struct svga_shader_emitter *emit )
266 {
267 int i = emit->nr_hw_temp + emit->internal_temp_count++;
268 assert(i < SVGA3D_TEMPREG_MAX);
269 i = MIN2(i, SVGA3D_TEMPREG_MAX - 1);
270 return dst_register( SVGA3DREG_TEMP, i );
271 }
272
273
274 /**
275 * Release a single temp. Currently only effective if it was the last
276 * allocated temp, otherwise release will be delayed until the next
277 * call to reset_temp_regs().
278 */
279 static void
280 release_temp( struct svga_shader_emitter *emit,
281 SVGA3dShaderDestToken temp )
282 {
283 if (temp.num == emit->internal_temp_count - 1)
284 emit->internal_temp_count--;
285 }
286
287
288 static void
289 reset_temp_regs(struct svga_shader_emitter *emit)
290 {
291 emit->internal_temp_count = 0;
292 }
293
294
295 /* Replace the src with the temporary specified in the dst, but copying
296 * only the necessary channels, and preserving the original swizzle (which is
297 * important given that several opcodes have constraints in the allowed
298 * swizzles).
299 */
300 static boolean
301 emit_repl(struct svga_shader_emitter *emit,
302 SVGA3dShaderDestToken dst,
303 struct src_register *src0)
304 {
305 unsigned src0_swizzle;
306 unsigned chan;
307
308 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
309
310 src0_swizzle = src0->base.swizzle;
311
312 dst.mask = 0;
313 for (chan = 0; chan < 4; ++chan) {
314 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
315 dst.mask |= 1 << swizzle;
316 }
317 assert(dst.mask);
318
319 src0->base.swizzle = SVGA3DSWIZZLE_NONE;
320
321 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
322 return FALSE;
323
324 *src0 = src( dst );
325 src0->base.swizzle = src0_swizzle;
326
327 return TRUE;
328 }
329
330
331 static boolean
332 submit_op0(struct svga_shader_emitter *emit,
333 SVGA3dShaderInstToken inst,
334 SVGA3dShaderDestToken dest)
335 {
336 return (emit_instruction( emit, inst ) &&
337 emit_dst( emit, dest ));
338 }
339
340
341 static boolean
342 submit_op1(struct svga_shader_emitter *emit,
343 SVGA3dShaderInstToken inst,
344 SVGA3dShaderDestToken dest,
345 struct src_register src0)
346 {
347 return emit_op1( emit, inst, dest, src0 );
348 }
349
350
351 /**
352 * SVGA shaders may not refer to >1 constant register in a single
353 * instruction. This function checks for that usage and inserts a
354 * move to temporary if detected.
355 *
356 * The same applies to input registers -- at most a single input
357 * register may be read by any instruction.
358 */
359 static boolean
360 submit_op2(struct svga_shader_emitter *emit,
361 SVGA3dShaderInstToken inst,
362 SVGA3dShaderDestToken dest,
363 struct src_register src0,
364 struct src_register src1)
365 {
366 SVGA3dShaderDestToken temp;
367 SVGA3dShaderRegType type0, type1;
368 boolean need_temp = FALSE;
369
370 temp.value = 0;
371 type0 = SVGA3dShaderGetRegType( src0.base.value );
372 type1 = SVGA3dShaderGetRegType( src1.base.value );
373
374 if (type0 == SVGA3DREG_CONST &&
375 type1 == SVGA3DREG_CONST &&
376 src0.base.num != src1.base.num)
377 need_temp = TRUE;
378
379 if (type0 == SVGA3DREG_INPUT &&
380 type1 == SVGA3DREG_INPUT &&
381 src0.base.num != src1.base.num)
382 need_temp = TRUE;
383
384 if (need_temp) {
385 temp = get_temp( emit );
386
387 if (!emit_repl( emit, temp, &src0 ))
388 return FALSE;
389 }
390
391 if (!emit_op2( emit, inst, dest, src0, src1 ))
392 return FALSE;
393
394 if (need_temp)
395 release_temp( emit, temp );
396
397 return TRUE;
398 }
399
400
401 /**
402 * SVGA shaders may not refer to >1 constant register in a single
403 * instruction. This function checks for that usage and inserts a
404 * move to temporary if detected.
405 */
406 static boolean
407 submit_op3(struct svga_shader_emitter *emit,
408 SVGA3dShaderInstToken inst,
409 SVGA3dShaderDestToken dest,
410 struct src_register src0,
411 struct src_register src1,
412 struct src_register src2)
413 {
414 SVGA3dShaderDestToken temp0;
415 SVGA3dShaderDestToken temp1;
416 boolean need_temp0 = FALSE;
417 boolean need_temp1 = FALSE;
418 SVGA3dShaderRegType type0, type1, type2;
419
420 temp0.value = 0;
421 temp1.value = 0;
422 type0 = SVGA3dShaderGetRegType( src0.base.value );
423 type1 = SVGA3dShaderGetRegType( src1.base.value );
424 type2 = SVGA3dShaderGetRegType( src2.base.value );
425
426 if (inst.op != SVGA3DOP_SINCOS) {
427 if (type0 == SVGA3DREG_CONST &&
428 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
429 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
430 need_temp0 = TRUE;
431
432 if (type1 == SVGA3DREG_CONST &&
433 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
434 need_temp1 = TRUE;
435 }
436
437 if (type0 == SVGA3DREG_INPUT &&
438 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
439 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
440 need_temp0 = TRUE;
441
442 if (type1 == SVGA3DREG_INPUT &&
443 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
444 need_temp1 = TRUE;
445
446 if (need_temp0) {
447 temp0 = get_temp( emit );
448
449 if (!emit_repl( emit, temp0, &src0 ))
450 return FALSE;
451 }
452
453 if (need_temp1) {
454 temp1 = get_temp( emit );
455
456 if (!emit_repl( emit, temp1, &src1 ))
457 return FALSE;
458 }
459
460 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
461 return FALSE;
462
463 if (need_temp1)
464 release_temp( emit, temp1 );
465 if (need_temp0)
466 release_temp( emit, temp0 );
467 return TRUE;
468 }
469
470
471 /**
472 * SVGA shaders may not refer to >1 constant register in a single
473 * instruction. This function checks for that usage and inserts a
474 * move to temporary if detected.
475 */
476 static boolean
477 submit_op4(struct svga_shader_emitter *emit,
478 SVGA3dShaderInstToken inst,
479 SVGA3dShaderDestToken dest,
480 struct src_register src0,
481 struct src_register src1,
482 struct src_register src2,
483 struct src_register src3)
484 {
485 SVGA3dShaderDestToken temp0;
486 SVGA3dShaderDestToken temp3;
487 boolean need_temp0 = FALSE;
488 boolean need_temp3 = FALSE;
489 SVGA3dShaderRegType type0, type1, type2, type3;
490
491 temp0.value = 0;
492 temp3.value = 0;
493 type0 = SVGA3dShaderGetRegType( src0.base.value );
494 type1 = SVGA3dShaderGetRegType( src1.base.value );
495 type2 = SVGA3dShaderGetRegType( src2.base.value );
496 type3 = SVGA3dShaderGetRegType( src2.base.value );
497
498 /* Make life a little easier - this is only used by the TXD
499 * instruction which is guaranteed not to have a constant/input reg
500 * in one slot at least:
501 */
502 assert(type1 == SVGA3DREG_SAMPLER);
503
504 if (type0 == SVGA3DREG_CONST &&
505 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
506 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
507 need_temp0 = TRUE;
508
509 if (type3 == SVGA3DREG_CONST &&
510 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
511 need_temp3 = TRUE;
512
513 if (type0 == SVGA3DREG_INPUT &&
514 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
515 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
516 need_temp0 = TRUE;
517
518 if (type3 == SVGA3DREG_INPUT &&
519 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
520 need_temp3 = TRUE;
521
522 if (need_temp0) {
523 temp0 = get_temp( emit );
524
525 if (!emit_repl( emit, temp0, &src0 ))
526 return FALSE;
527 }
528
529 if (need_temp3) {
530 temp3 = get_temp( emit );
531
532 if (!emit_repl( emit, temp3, &src3 ))
533 return FALSE;
534 }
535
536 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
537 return FALSE;
538
539 if (need_temp3)
540 release_temp( emit, temp3 );
541 if (need_temp0)
542 release_temp( emit, temp0 );
543 return TRUE;
544 }
545
546
547 /**
548 * Do the src and dest registers refer to the same register?
549 */
550 static boolean
551 alias_src_dst(struct src_register src,
552 SVGA3dShaderDestToken dst)
553 {
554 if (src.base.num != dst.num)
555 return FALSE;
556
557 if (SVGA3dShaderGetRegType(dst.value) !=
558 SVGA3dShaderGetRegType(src.base.value))
559 return FALSE;
560
561 return TRUE;
562 }
563
564
565 static boolean
566 submit_lrp(struct svga_shader_emitter *emit,
567 SVGA3dShaderDestToken dst,
568 struct src_register src0,
569 struct src_register src1,
570 struct src_register src2)
571 {
572 SVGA3dShaderDestToken tmp;
573 boolean need_dst_tmp = FALSE;
574
575 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
576 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
577 alias_src_dst(src0, dst) ||
578 alias_src_dst(src2, dst))
579 need_dst_tmp = TRUE;
580
581 if (need_dst_tmp) {
582 tmp = get_temp( emit );
583 tmp.mask = dst.mask;
584 }
585 else {
586 tmp = dst;
587 }
588
589 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
590 return FALSE;
591
592 if (need_dst_tmp) {
593 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
594 return FALSE;
595 }
596
597 return TRUE;
598 }
599
600
601 static boolean
602 emit_def_const(struct svga_shader_emitter *emit,
603 SVGA3dShaderConstType type,
604 unsigned idx, float a, float b, float c, float d)
605 {
606 SVGA3DOpDefArgs def;
607 SVGA3dShaderInstToken opcode;
608
609 switch (type) {
610 case SVGA3D_CONST_TYPE_FLOAT:
611 opcode = inst_token( SVGA3DOP_DEF );
612 def.dst = dst_register( SVGA3DREG_CONST, idx );
613 def.constValues[0] = a;
614 def.constValues[1] = b;
615 def.constValues[2] = c;
616 def.constValues[3] = d;
617 break;
618 case SVGA3D_CONST_TYPE_INT:
619 opcode = inst_token( SVGA3DOP_DEFI );
620 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
621 def.constIValues[0] = (int)a;
622 def.constIValues[1] = (int)b;
623 def.constIValues[2] = (int)c;
624 def.constIValues[3] = (int)d;
625 break;
626 default:
627 assert(0);
628 opcode = inst_token( SVGA3DOP_NOP );
629 break;
630 }
631
632 if (!emit_instruction(emit, opcode) ||
633 !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
634 return FALSE;
635
636 return TRUE;
637 }
638
639
640 static boolean
641 create_zero_immediate( struct svga_shader_emitter *emit )
642 {
643 unsigned idx = emit->nr_hw_float_const++;
644
645 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
646 * other useful vectors.
647 */
648 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
649 idx, 0, 0.5, -1, 1 ))
650 return FALSE;
651
652 emit->zero_immediate_idx = idx;
653 emit->created_zero_immediate = TRUE;
654
655 return TRUE;
656 }
657
658
659 static boolean
660 create_loop_const( struct svga_shader_emitter *emit )
661 {
662 unsigned idx = emit->nr_hw_int_const++;
663
664 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
665 255, /* iteration count */
666 0, /* initial value */
667 1, /* step size */
668 0 /* not used, must be 0 */))
669 return FALSE;
670
671 emit->loop_const_idx = idx;
672 emit->created_loop_const = TRUE;
673
674 return TRUE;
675 }
676
677 static boolean
678 create_arl_consts( struct svga_shader_emitter *emit )
679 {
680 int i;
681
682 for (i = 0; i < emit->num_arl_consts; i += 4) {
683 int j;
684 unsigned idx = emit->nr_hw_float_const++;
685 float vals[4];
686 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
687 vals[j] = (float) emit->arl_consts[i + j].number;
688 emit->arl_consts[i + j].idx = idx;
689 switch (j) {
690 case 0:
691 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
692 break;
693 case 1:
694 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
695 break;
696 case 2:
697 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
698 break;
699 case 3:
700 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
701 break;
702 }
703 }
704 while (j < 4)
705 vals[j++] = 0;
706
707 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
708 vals[0], vals[1],
709 vals[2], vals[3]))
710 return FALSE;
711 }
712
713 return TRUE;
714 }
715
716
717 /**
718 * Return the register which holds the pixel shaders front/back-
719 * facing value.
720 */
721 static struct src_register
722 get_vface( struct svga_shader_emitter *emit )
723 {
724 assert(emit->emitted_vface);
725 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
726 }
727
728
729 /**
730 * returns {0, 0, 0, 1} immediate
731 */
732 static struct src_register
733 get_zero_immediate( struct svga_shader_emitter *emit )
734 {
735 assert(emit->created_zero_immediate);
736 assert(emit->zero_immediate_idx >= 0);
737 return swizzle(src_register( SVGA3DREG_CONST,
738 emit->zero_immediate_idx),
739 0, 0, 0, 3);
740 }
741
742
743 /**
744 * returns {1, 1, 1, -1} immediate
745 */
746 static struct src_register
747 get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
748 {
749 assert(emit->created_zero_immediate);
750 assert(emit->zero_immediate_idx >= 0);
751 return swizzle(src_register( SVGA3DREG_CONST,
752 emit->zero_immediate_idx),
753 3, 3, 3, 2);
754 }
755
756
757 /**
758 * returns {0.5, 0.5, 0.5, 0.5} immediate
759 */
760 static struct src_register
761 get_half_immediate( struct svga_shader_emitter *emit )
762 {
763 assert(emit->created_zero_immediate);
764 assert(emit->zero_immediate_idx >= 0);
765 return swizzle(src_register(SVGA3DREG_CONST, emit->zero_immediate_idx),
766 1, 1, 1, 1);
767 }
768
769
770 /**
771 * returns the loop const
772 */
773 static struct src_register
774 get_loop_const( struct svga_shader_emitter *emit )
775 {
776 assert(emit->created_loop_const);
777 assert(emit->loop_const_idx >= 0);
778 return src_register( SVGA3DREG_CONSTINT,
779 emit->loop_const_idx );
780 }
781
782
783 static struct src_register
784 get_fake_arl_const( struct svga_shader_emitter *emit )
785 {
786 struct src_register reg;
787 int idx = 0, swizzle = 0, i;
788
789 for (i = 0; i < emit->num_arl_consts; ++ i) {
790 if (emit->arl_consts[i].arl_num == emit->current_arl) {
791 idx = emit->arl_consts[i].idx;
792 swizzle = emit->arl_consts[i].swizzle;
793 }
794 }
795
796 reg = src_register( SVGA3DREG_CONST, idx );
797 return scalar(reg, swizzle);
798 }
799
800
801 /**
802 * Return the register which holds the current dimenions of the
803 * texture bound to the given sampler
804 */
805 static struct src_register
806 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
807 {
808 int idx;
809 struct src_register reg;
810
811 /* the width/height indexes start right after constants */
812 idx = emit->key.fkey.tex[sampler_num].width_height_idx +
813 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
814
815 reg = src_register( SVGA3DREG_CONST, idx );
816 return reg;
817 }
818
819
820 static boolean
821 emit_fake_arl(struct svga_shader_emitter *emit,
822 const struct tgsi_full_instruction *insn)
823 {
824 const struct src_register src0 =
825 translate_src_register(emit, &insn->Src[0] );
826 struct src_register src1 = get_fake_arl_const( emit );
827 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
828 SVGA3dShaderDestToken tmp = get_temp( emit );
829
830 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
831 return FALSE;
832
833 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
834 src1))
835 return FALSE;
836
837 /* replicate the original swizzle */
838 src1 = src(tmp);
839 src1.base.swizzle = src0.base.swizzle;
840
841 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
842 dst, src1 );
843 }
844
845
846 static boolean
847 emit_if(struct svga_shader_emitter *emit,
848 const struct tgsi_full_instruction *insn)
849 {
850 struct src_register src0 =
851 translate_src_register(emit, &insn->Src[0]);
852 struct src_register zero = get_zero_immediate( emit );
853 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
854
855 if_token.control = SVGA3DOPCOMPC_NE;
856 zero = scalar(zero, TGSI_SWIZZLE_X);
857
858 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
859 /*
860 * Max different constant registers readable per IFC instruction is 1.
861 */
862 SVGA3dShaderDestToken tmp = get_temp( emit );
863
864 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
865 return FALSE;
866
867 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
868 }
869
870 emit->dynamic_branching_level++;
871
872 return (emit_instruction( emit, if_token ) &&
873 emit_src( emit, src0 ) &&
874 emit_src( emit, zero ) );
875 }
876
877
878 static boolean
879 emit_endif(struct svga_shader_emitter *emit,
880 const struct tgsi_full_instruction *insn)
881 {
882 emit->dynamic_branching_level--;
883
884 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
885 }
886
887
888 static boolean
889 emit_else(struct svga_shader_emitter *emit,
890 const struct tgsi_full_instruction *insn)
891 {
892 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
893 }
894
895
896 /**
897 * Translate the following TGSI FLR instruction.
898 * FLR DST, SRC
899 * To the following SVGA3D instruction sequence.
900 * FRC TMP, SRC
901 * SUB DST, SRC, TMP
902 */
903 static boolean
904 emit_floor(struct svga_shader_emitter *emit,
905 const struct tgsi_full_instruction *insn )
906 {
907 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
908 const struct src_register src0 =
909 translate_src_register(emit, &insn->Src[0] );
910 SVGA3dShaderDestToken temp = get_temp( emit );
911
912 /* FRC TMP, SRC */
913 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
914 return FALSE;
915
916 /* SUB DST, SRC, TMP */
917 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
918 negate( src( temp ) ) ))
919 return FALSE;
920
921 return TRUE;
922 }
923
924
925 /**
926 * Translate the following TGSI CEIL instruction.
927 * CEIL DST, SRC
928 * To the following SVGA3D instruction sequence.
929 * FRC TMP, -SRC
930 * ADD DST, SRC, TMP
931 */
932 static boolean
933 emit_ceil(struct svga_shader_emitter *emit,
934 const struct tgsi_full_instruction *insn)
935 {
936 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
937 const struct src_register src0 =
938 translate_src_register(emit, &insn->Src[0]);
939 SVGA3dShaderDestToken temp = get_temp(emit);
940
941 /* FRC TMP, -SRC */
942 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
943 return FALSE;
944
945 /* ADD DST, SRC, TMP */
946 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
947 return FALSE;
948
949 return TRUE;
950 }
951
952
953 /**
954 * Translate the following TGSI DIV instruction.
955 * DIV DST.xy, SRC0, SRC1
956 * To the following SVGA3D instruction sequence.
957 * RCP TMP.x, SRC1.xxxx
958 * RCP TMP.y, SRC1.yyyy
959 * MUL DST.xy, SRC0, TMP
960 */
961 static boolean
962 emit_div(struct svga_shader_emitter *emit,
963 const struct tgsi_full_instruction *insn )
964 {
965 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
966 const struct src_register src0 =
967 translate_src_register(emit, &insn->Src[0] );
968 const struct src_register src1 =
969 translate_src_register(emit, &insn->Src[1] );
970 SVGA3dShaderDestToken temp = get_temp( emit );
971 int i;
972
973 /* For each enabled element, perform a RCP instruction. Note that
974 * RCP is scalar in SVGA3D:
975 */
976 for (i = 0; i < 4; i++) {
977 unsigned channel = 1 << i;
978 if (dst.mask & channel) {
979 /* RCP TMP.?, SRC1.???? */
980 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
981 writemask(temp, channel),
982 scalar(src1, i) ))
983 return FALSE;
984 }
985 }
986
987 /* Vector mul:
988 * MUL DST, SRC0, TMP
989 */
990 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
991 src( temp ) ))
992 return FALSE;
993
994 return TRUE;
995 }
996
997
998 /**
999 * Translate the following TGSI DP2 instruction.
1000 * DP2 DST, SRC1, SRC2
1001 * To the following SVGA3D instruction sequence.
1002 * MUL TMP, SRC1, SRC2
1003 * ADD DST, TMP.xxxx, TMP.yyyy
1004 */
1005 static boolean
1006 emit_dp2(struct svga_shader_emitter *emit,
1007 const struct tgsi_full_instruction *insn )
1008 {
1009 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1010 const struct src_register src0 =
1011 translate_src_register(emit, &insn->Src[0]);
1012 const struct src_register src1 =
1013 translate_src_register(emit, &insn->Src[1]);
1014 SVGA3dShaderDestToken temp = get_temp( emit );
1015 struct src_register temp_src0, temp_src1;
1016
1017 /* MUL TMP, SRC1, SRC2 */
1018 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1019 return FALSE;
1020
1021 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1022 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1023
1024 /* ADD DST, TMP.xxxx, TMP.yyyy */
1025 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1026 temp_src0, temp_src1 ))
1027 return FALSE;
1028
1029 return TRUE;
1030 }
1031
1032
1033 /**
1034 * Translate the following TGSI DPH instruction.
1035 * DPH DST, SRC1, SRC2
1036 * To the following SVGA3D instruction sequence.
1037 * DP3 TMP, SRC1, SRC2
1038 * ADD DST, TMP, SRC2.wwww
1039 */
1040 static boolean
1041 emit_dph(struct svga_shader_emitter *emit,
1042 const struct tgsi_full_instruction *insn )
1043 {
1044 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1045 const struct src_register src0 = translate_src_register(
1046 emit, &insn->Src[0] );
1047 struct src_register src1 =
1048 translate_src_register(emit, &insn->Src[1]);
1049 SVGA3dShaderDestToken temp = get_temp( emit );
1050
1051 /* DP3 TMP, SRC1, SRC2 */
1052 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1053 return FALSE;
1054
1055 src1 = scalar(src1, TGSI_SWIZZLE_W);
1056
1057 /* ADD DST, TMP, SRC2.wwww */
1058 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1059 src( temp ), src1 ))
1060 return FALSE;
1061
1062 return TRUE;
1063 }
1064
1065
1066 /**
1067 * Translate the following TGSI DST instruction.
1068 * NRM DST, SRC
1069 * To the following SVGA3D instruction sequence.
1070 * DP3 TMP, SRC, SRC
1071 * RSQ TMP, TMP
1072 * MUL DST, SRC, TMP
1073 */
1074 static boolean
1075 emit_nrm(struct svga_shader_emitter *emit,
1076 const struct tgsi_full_instruction *insn)
1077 {
1078 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1079 const struct src_register src0 =
1080 translate_src_register(emit, &insn->Src[0]);
1081 SVGA3dShaderDestToken temp = get_temp( emit );
1082
1083 /* DP3 TMP, SRC, SRC */
1084 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
1085 return FALSE;
1086
1087 /* RSQ TMP, TMP */
1088 if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
1089 return FALSE;
1090
1091 /* MUL DST, SRC, TMP */
1092 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
1093 src0, src( temp )))
1094 return FALSE;
1095
1096 return TRUE;
1097 }
1098
1099
1100 static boolean
1101 do_emit_sincos(struct svga_shader_emitter *emit,
1102 SVGA3dShaderDestToken dst,
1103 struct src_register src0)
1104 {
1105 src0 = scalar(src0, TGSI_SWIZZLE_X);
1106 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1107 }
1108
1109
1110 static boolean
1111 emit_sincos(struct svga_shader_emitter *emit,
1112 const struct tgsi_full_instruction *insn)
1113 {
1114 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1115 struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
1116 SVGA3dShaderDestToken temp = get_temp( emit );
1117
1118 /* SCS TMP SRC */
1119 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1120 return FALSE;
1121
1122 /* MOV DST TMP */
1123 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1124 return FALSE;
1125
1126 return TRUE;
1127 }
1128
1129
1130 /**
1131 * SCS TMP SRC
1132 * MOV DST TMP.yyyy
1133 */
1134 static boolean
1135 emit_sin(struct svga_shader_emitter *emit,
1136 const struct tgsi_full_instruction *insn )
1137 {
1138 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1139 struct src_register src0 =
1140 translate_src_register(emit, &insn->Src[0] );
1141 SVGA3dShaderDestToken temp = get_temp( emit );
1142
1143 /* SCS TMP SRC */
1144 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1145 return FALSE;
1146
1147 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1148
1149 /* MOV DST TMP.yyyy */
1150 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1151 return FALSE;
1152
1153 return TRUE;
1154 }
1155
1156 /*
1157 * SCS TMP SRC
1158 * MOV DST TMP.xxxx
1159 */
1160 static boolean
1161 emit_cos(struct svga_shader_emitter *emit,
1162 const struct tgsi_full_instruction *insn)
1163 {
1164 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1165 struct src_register src0 =
1166 translate_src_register(emit, &insn->Src[0] );
1167 SVGA3dShaderDestToken temp = get_temp( emit );
1168
1169 /* SCS TMP SRC */
1170 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1171 return FALSE;
1172
1173 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1174
1175 /* MOV DST TMP.xxxx */
1176 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1177 return FALSE;
1178
1179 return TRUE;
1180 }
1181
1182
1183 static boolean
1184 emit_ssg(struct svga_shader_emitter *emit,
1185 const struct tgsi_full_instruction *insn)
1186 {
1187 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1188 struct src_register src0 =
1189 translate_src_register(emit, &insn->Src[0] );
1190 SVGA3dShaderDestToken temp0 = get_temp( emit );
1191 SVGA3dShaderDestToken temp1 = get_temp( emit );
1192 struct src_register zero, one;
1193
1194 if (emit->unit == PIPE_SHADER_VERTEX) {
1195 /* SGN DST, SRC0, TMP0, TMP1 */
1196 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1197 src( temp0 ), src( temp1 ) );
1198 }
1199
1200 zero = get_zero_immediate( emit );
1201 one = scalar( zero, TGSI_SWIZZLE_W );
1202 zero = scalar( zero, TGSI_SWIZZLE_X );
1203
1204 /* CMP TMP0, SRC0, one, zero */
1205 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1206 writemask( temp0, dst.mask ), src0, one, zero ))
1207 return FALSE;
1208
1209 /* CMP TMP1, negate(SRC0), negate(one), zero */
1210 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1211 writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1212 zero ))
1213 return FALSE;
1214
1215 /* ADD DST, TMP0, TMP1 */
1216 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1217 src( temp1 ) );
1218 }
1219
1220
1221 /**
1222 * ADD DST SRC0, negate(SRC0)
1223 */
1224 static boolean
1225 emit_sub(struct svga_shader_emitter *emit,
1226 const struct tgsi_full_instruction *insn)
1227 {
1228 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1229 struct src_register src0 = translate_src_register(
1230 emit, &insn->Src[0] );
1231 struct src_register src1 = translate_src_register(
1232 emit, &insn->Src[1] );
1233
1234 src1 = negate(src1);
1235
1236 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1237 src0, src1 ))
1238 return FALSE;
1239
1240 return TRUE;
1241 }
1242
1243
1244 static boolean
1245 emit_kil(struct svga_shader_emitter *emit,
1246 const struct tgsi_full_instruction *insn)
1247 {
1248 const struct tgsi_full_src_register *reg = &insn->Src[0];
1249 struct src_register src0, srcIn;
1250 /* is the W component tested in another position? */
1251 const boolean w_tested = (reg->Register.SwizzleW == reg->Register.SwizzleX ||
1252 reg->Register.SwizzleW == reg->Register.SwizzleY ||
1253 reg->Register.SwizzleW == reg->Register.SwizzleZ);
1254 const boolean special = (reg->Register.Absolute ||
1255 reg->Register.Negate ||
1256 reg->Register.Indirect ||
1257 reg->Register.SwizzleX != 0 ||
1258 reg->Register.SwizzleY != 1 ||
1259 reg->Register.SwizzleZ != 2 ||
1260 reg->Register.File != TGSI_FILE_TEMPORARY);
1261 SVGA3dShaderDestToken temp;
1262
1263 src0 = srcIn = translate_src_register( emit, reg );
1264
1265 if (special || !w_tested) {
1266 /* need a temp reg */
1267 temp = get_temp( emit );
1268 }
1269
1270 if (special) {
1271 /* move the source into a temp register */
1272 submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1273 writemask( temp, TGSI_WRITEMASK_XYZ ),
1274 src0 );
1275
1276 src0 = src( temp );
1277 }
1278
1279 /* do the texkill (on the xyz components) */
1280 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1281 return FALSE;
1282
1283 if (!w_tested) {
1284 /* need to emit a second texkill to test the W component */
1285 /* put src.wwww into temp register */
1286 if (!submit_op1(emit,
1287 inst_token( SVGA3DOP_MOV ),
1288 writemask( temp, TGSI_WRITEMASK_XYZ ),
1289 scalar(srcIn, TGSI_SWIZZLE_W)))
1290 return FALSE;
1291
1292 /* second texkill */
1293 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), temp ))
1294 return FALSE;
1295 }
1296
1297 return TRUE;
1298 }
1299
1300
1301 /**
1302 * mesa state tracker always emits kilp as an unconditional kil
1303 */
1304 static boolean
1305 emit_kilp(struct svga_shader_emitter *emit,
1306 const struct tgsi_full_instruction *insn)
1307 {
1308 SVGA3dShaderDestToken temp;
1309 struct src_register one = scalar( get_zero_immediate( emit ),
1310 TGSI_SWIZZLE_W );
1311 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1312
1313 /* texkill doesn't allow negation on the operand so lets move
1314 * negation of {1} to a temp register */
1315 temp = get_temp( emit );
1316 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1317 negate( one ) ))
1318 return FALSE;
1319
1320 return submit_op0( emit, inst, temp );
1321 }
1322
1323
1324 /**
1325 * Test if r1 and r2 are the same register.
1326 */
1327 static boolean
1328 same_register(struct src_register r1, struct src_register r2)
1329 {
1330 return (r1.base.num == r2.base.num &&
1331 r1.base.type_upper == r2.base.type_upper &&
1332 r1.base.type_lower == r2.base.type_lower);
1333 }
1334
1335
1336
1337 /* Implement conditionals by initializing destination reg to 'fail',
1338 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1339 * based on predicate reg.
1340 *
1341 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1342 * MOV dst, fail
1343 * MOV dst, pass, p0
1344 */
1345 static boolean
1346 emit_conditional(struct svga_shader_emitter *emit,
1347 unsigned compare_func,
1348 SVGA3dShaderDestToken dst,
1349 struct src_register src0,
1350 struct src_register src1,
1351 struct src_register pass,
1352 struct src_register fail)
1353 {
1354 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1355 SVGA3dShaderInstToken setp_token, mov_token;
1356 setp_token = inst_token( SVGA3DOP_SETP );
1357
1358 switch (compare_func) {
1359 case PIPE_FUNC_NEVER:
1360 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1361 dst, fail );
1362 break;
1363 case PIPE_FUNC_LESS:
1364 setp_token.control = SVGA3DOPCOMP_LT;
1365 break;
1366 case PIPE_FUNC_EQUAL:
1367 setp_token.control = SVGA3DOPCOMP_EQ;
1368 break;
1369 case PIPE_FUNC_LEQUAL:
1370 setp_token.control = SVGA3DOPCOMP_LE;
1371 break;
1372 case PIPE_FUNC_GREATER:
1373 setp_token.control = SVGA3DOPCOMP_GT;
1374 break;
1375 case PIPE_FUNC_NOTEQUAL:
1376 setp_token.control = SVGA3DOPCOMPC_NE;
1377 break;
1378 case PIPE_FUNC_GEQUAL:
1379 setp_token.control = SVGA3DOPCOMP_GE;
1380 break;
1381 case PIPE_FUNC_ALWAYS:
1382 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1383 dst, pass );
1384 break;
1385 }
1386
1387 if (same_register(src(dst), pass)) {
1388 /* We'll get bad results if the dst and pass registers are the same
1389 * so use a temp register containing pass.
1390 */
1391 SVGA3dShaderDestToken temp = get_temp(emit);
1392 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1393 return FALSE;
1394 pass = src(temp);
1395 }
1396
1397 /* SETP src0, COMPOP, src1 */
1398 if (!submit_op2( emit, setp_token, pred_reg,
1399 src0, src1 ))
1400 return FALSE;
1401
1402 mov_token = inst_token( SVGA3DOP_MOV );
1403
1404 /* MOV dst, fail */
1405 if (!submit_op1( emit, mov_token, dst,
1406 fail ))
1407 return FALSE;
1408
1409 /* MOV dst, pass (predicated)
1410 *
1411 * Note that the predicate reg (and possible modifiers) is passed
1412 * as the first source argument.
1413 */
1414 mov_token.predicated = 1;
1415 if (!submit_op2( emit, mov_token, dst,
1416 src( pred_reg ), pass ))
1417 return FALSE;
1418
1419 return TRUE;
1420 }
1421
1422
1423 static boolean
1424 emit_select(struct svga_shader_emitter *emit,
1425 unsigned compare_func,
1426 SVGA3dShaderDestToken dst,
1427 struct src_register src0,
1428 struct src_register src1 )
1429 {
1430 /* There are some SVGA instructions which implement some selects
1431 * directly, but they are only available in the vertex shader.
1432 */
1433 if (emit->unit == PIPE_SHADER_VERTEX) {
1434 switch (compare_func) {
1435 case PIPE_FUNC_GEQUAL:
1436 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1437 case PIPE_FUNC_LEQUAL:
1438 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1439 case PIPE_FUNC_GREATER:
1440 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1441 case PIPE_FUNC_LESS:
1442 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1443 default:
1444 break;
1445 }
1446 }
1447
1448 /* Otherwise, need to use the setp approach:
1449 */
1450 {
1451 struct src_register one, zero;
1452 /* zero immediate is 0,0,0,1 */
1453 zero = get_zero_immediate( emit );
1454 one = scalar( zero, TGSI_SWIZZLE_W );
1455 zero = scalar( zero, TGSI_SWIZZLE_X );
1456
1457 return emit_conditional(
1458 emit,
1459 compare_func,
1460 dst,
1461 src0,
1462 src1,
1463 one, zero);
1464 }
1465 }
1466
1467
1468 static boolean
1469 emit_select_op(struct svga_shader_emitter *emit,
1470 unsigned compare,
1471 const struct tgsi_full_instruction *insn)
1472 {
1473 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1474 struct src_register src0 = translate_src_register(
1475 emit, &insn->Src[0] );
1476 struct src_register src1 = translate_src_register(
1477 emit, &insn->Src[1] );
1478
1479 return emit_select( emit, compare, dst, src0, src1 );
1480 }
1481
1482
1483 /**
1484 * Translate TGSI CMP instruction.
1485 */
1486 static boolean
1487 emit_cmp(struct svga_shader_emitter *emit,
1488 const struct tgsi_full_instruction *insn)
1489 {
1490 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1491 const struct src_register src0 =
1492 translate_src_register(emit, &insn->Src[0] );
1493 const struct src_register src1 =
1494 translate_src_register(emit, &insn->Src[1] );
1495 const struct src_register src2 =
1496 translate_src_register(emit, &insn->Src[2] );
1497
1498 if (emit->unit == PIPE_SHADER_VERTEX) {
1499 struct src_register zero =
1500 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
1501 /* We used to simulate CMP with SLT+LRP. But that didn't work when
1502 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed
1503 * because it involves a CMP to handle the 0 case.
1504 * Use a conditional expression instead.
1505 */
1506 return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1507 src0, zero, src1, src2);
1508 }
1509 else {
1510 assert(emit->unit == PIPE_SHADER_FRAGMENT);
1511
1512 /* CMP DST, SRC0, SRC2, SRC1 */
1513 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1514 src0, src2, src1);
1515 }
1516 }
1517
1518
1519 /**
1520 * Translate texture instructions to SVGA3D representation.
1521 */
1522 static boolean
1523 emit_tex2(struct svga_shader_emitter *emit,
1524 const struct tgsi_full_instruction *insn,
1525 SVGA3dShaderDestToken dst)
1526 {
1527 SVGA3dShaderInstToken inst;
1528 struct src_register texcoord;
1529 struct src_register sampler;
1530 SVGA3dShaderDestToken tmp;
1531
1532 inst.value = 0;
1533
1534 switch (insn->Instruction.Opcode) {
1535 case TGSI_OPCODE_TEX:
1536 inst.op = SVGA3DOP_TEX;
1537 break;
1538 case TGSI_OPCODE_TXP:
1539 inst.op = SVGA3DOP_TEX;
1540 inst.control = SVGA3DOPCONT_PROJECT;
1541 break;
1542 case TGSI_OPCODE_TXB:
1543 inst.op = SVGA3DOP_TEX;
1544 inst.control = SVGA3DOPCONT_BIAS;
1545 break;
1546 case TGSI_OPCODE_TXL:
1547 inst.op = SVGA3DOP_TEXLDL;
1548 break;
1549 default:
1550 assert(0);
1551 return FALSE;
1552 }
1553
1554 texcoord = translate_src_register( emit, &insn->Src[0] );
1555 sampler = translate_src_register( emit, &insn->Src[1] );
1556
1557 if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1558 emit->dynamic_branching_level > 0)
1559 tmp = get_temp( emit );
1560
1561 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1562 * zero in that case.
1563 */
1564 if (emit->dynamic_branching_level > 0 &&
1565 inst.op == SVGA3DOP_TEX &&
1566 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1567 struct src_register zero = get_zero_immediate( emit );
1568
1569 /* MOV tmp, texcoord */
1570 if (!submit_op1( emit,
1571 inst_token( SVGA3DOP_MOV ),
1572 tmp,
1573 texcoord ))
1574 return FALSE;
1575
1576 /* MOV tmp.w, zero */
1577 if (!submit_op1( emit,
1578 inst_token( SVGA3DOP_MOV ),
1579 writemask( tmp, TGSI_WRITEMASK_W ),
1580 scalar( zero, TGSI_SWIZZLE_X )))
1581 return FALSE;
1582
1583 texcoord = src( tmp );
1584 inst.op = SVGA3DOP_TEXLDL;
1585 }
1586
1587 /* Explicit normalization of texcoords:
1588 */
1589 if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1590 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1591
1592 /* MUL tmp, SRC0, WH */
1593 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1594 tmp, texcoord, wh ))
1595 return FALSE;
1596
1597 texcoord = src( tmp );
1598 }
1599
1600 return submit_op2( emit, inst, dst, texcoord, sampler );
1601 }
1602
1603
1604 /**
1605 * Translate texture instructions to SVGA3D representation.
1606 */
1607 static boolean
1608 emit_tex4(struct svga_shader_emitter *emit,
1609 const struct tgsi_full_instruction *insn,
1610 SVGA3dShaderDestToken dst )
1611 {
1612 SVGA3dShaderInstToken inst;
1613 struct src_register texcoord;
1614 struct src_register ddx;
1615 struct src_register ddy;
1616 struct src_register sampler;
1617
1618 texcoord = translate_src_register( emit, &insn->Src[0] );
1619 ddx = translate_src_register( emit, &insn->Src[1] );
1620 ddy = translate_src_register( emit, &insn->Src[2] );
1621 sampler = translate_src_register( emit, &insn->Src[3] );
1622
1623 inst.value = 0;
1624
1625 switch (insn->Instruction.Opcode) {
1626 case TGSI_OPCODE_TXD:
1627 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1628 break;
1629 default:
1630 assert(0);
1631 return FALSE;
1632 }
1633
1634 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1635 }
1636
1637
1638 /**
1639 * Emit texture swizzle code.
1640 */
1641 static boolean
1642 emit_tex_swizzle(struct svga_shader_emitter *emit,
1643 SVGA3dShaderDestToken dst,
1644 struct src_register src,
1645 unsigned swizzle_x,
1646 unsigned swizzle_y,
1647 unsigned swizzle_z,
1648 unsigned swizzle_w)
1649 {
1650 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1651 unsigned srcSwizzle[4];
1652 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1653 int i;
1654
1655 /* build writemasks and srcSwizzle terms */
1656 for (i = 0; i < 4; i++) {
1657 if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1658 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1659 zeroWritemask |= (1 << i);
1660 }
1661 else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1662 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1663 oneWritemask |= (1 << i);
1664 }
1665 else {
1666 srcSwizzle[i] = swizzleIn[i];
1667 srcWritemask |= (1 << i);
1668 }
1669 }
1670
1671 /* write x/y/z/w comps */
1672 if (dst.mask & srcWritemask) {
1673 if (!submit_op1(emit,
1674 inst_token(SVGA3DOP_MOV),
1675 writemask(dst, srcWritemask),
1676 swizzle(src,
1677 srcSwizzle[0],
1678 srcSwizzle[1],
1679 srcSwizzle[2],
1680 srcSwizzle[3])))
1681 return FALSE;
1682 }
1683
1684 /* write 0 comps */
1685 if (dst.mask & zeroWritemask) {
1686 if (!submit_op1(emit,
1687 inst_token(SVGA3DOP_MOV),
1688 writemask(dst, zeroWritemask),
1689 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X)))
1690 return FALSE;
1691 }
1692
1693 /* write 1 comps */
1694 if (dst.mask & oneWritemask) {
1695 if (!submit_op1(emit,
1696 inst_token(SVGA3DOP_MOV),
1697 writemask(dst, oneWritemask),
1698 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W)))
1699 return FALSE;
1700 }
1701
1702 return TRUE;
1703 }
1704
1705
1706 static boolean
1707 emit_tex(struct svga_shader_emitter *emit,
1708 const struct tgsi_full_instruction *insn)
1709 {
1710 SVGA3dShaderDestToken dst =
1711 translate_dst_register( emit, insn, 0 );
1712 struct src_register src0 =
1713 translate_src_register( emit, &insn->Src[0] );
1714 struct src_register src1 =
1715 translate_src_register( emit, &insn->Src[1] );
1716
1717 SVGA3dShaderDestToken tex_result;
1718 const unsigned unit = src1.base.num;
1719
1720 /* check for shadow samplers */
1721 boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
1722 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1723
1724 /* texture swizzle */
1725 boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1726 emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1727 emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1728 emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1729
1730 boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE;
1731
1732 /* If doing compare processing or tex swizzle or saturation, we need to put
1733 * the fetched color into a temporary so it can be used as a source later on.
1734 */
1735 if (compare || swizzle || saturate) {
1736 tex_result = get_temp( emit );
1737 }
1738 else {
1739 tex_result = dst;
1740 }
1741
1742 switch(insn->Instruction.Opcode) {
1743 case TGSI_OPCODE_TEX:
1744 case TGSI_OPCODE_TXB:
1745 case TGSI_OPCODE_TXP:
1746 case TGSI_OPCODE_TXL:
1747 if (!emit_tex2( emit, insn, tex_result ))
1748 return FALSE;
1749 break;
1750 case TGSI_OPCODE_TXD:
1751 if (!emit_tex4( emit, insn, tex_result ))
1752 return FALSE;
1753 break;
1754 default:
1755 assert(0);
1756 }
1757
1758 if (compare) {
1759 SVGA3dShaderDestToken dst2;
1760
1761 if (swizzle || saturate)
1762 dst2 = tex_result;
1763 else
1764 dst2 = dst;
1765
1766 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1767 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1768 /* When sampling a depth texture, the result of the comparison is in
1769 * the Y component.
1770 */
1771 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1772 struct src_register r_coord;
1773
1774 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1775 /* Divide texcoord R by Q */
1776 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1777 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1778 scalar(src0, TGSI_SWIZZLE_W) ))
1779 return FALSE;
1780
1781 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1782 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1783 scalar(src0, TGSI_SWIZZLE_Z),
1784 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1785 return FALSE;
1786
1787 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1788 }
1789 else {
1790 r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1791 }
1792
1793 /* Compare texture sample value against R component of texcoord */
1794 if (!emit_select(emit,
1795 emit->key.fkey.tex[unit].compare_func,
1796 writemask( dst2, TGSI_WRITEMASK_XYZ ),
1797 r_coord,
1798 tex_src_x))
1799 return FALSE;
1800 }
1801
1802 if (dst.mask & TGSI_WRITEMASK_W) {
1803 struct src_register one =
1804 scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1805
1806 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1807 writemask( dst2, TGSI_WRITEMASK_W ),
1808 one ))
1809 return FALSE;
1810 }
1811 }
1812
1813 if (saturate && !swizzle) {
1814 /* MOV_SAT real_dst, dst */
1815 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1816 return FALSE;
1817 }
1818 else if (swizzle) {
1819 /* swizzle from tex_result to dst (handles saturation too, if any) */
1820 emit_tex_swizzle(emit,
1821 dst, src(tex_result),
1822 emit->key.fkey.tex[unit].swizzle_r,
1823 emit->key.fkey.tex[unit].swizzle_g,
1824 emit->key.fkey.tex[unit].swizzle_b,
1825 emit->key.fkey.tex[unit].swizzle_a);
1826 }
1827
1828 return TRUE;
1829 }
1830
1831
1832 static boolean
1833 emit_bgnloop2(struct svga_shader_emitter *emit,
1834 const struct tgsi_full_instruction *insn)
1835 {
1836 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1837 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1838 struct src_register const_int = get_loop_const( emit );
1839
1840 emit->dynamic_branching_level++;
1841
1842 return (emit_instruction( emit, inst ) &&
1843 emit_src( emit, loop_reg ) &&
1844 emit_src( emit, const_int ) );
1845 }
1846
1847
1848 static boolean
1849 emit_endloop2(struct svga_shader_emitter *emit,
1850 const struct tgsi_full_instruction *insn)
1851 {
1852 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1853
1854 emit->dynamic_branching_level--;
1855
1856 return emit_instruction( emit, inst );
1857 }
1858
1859
1860 static boolean
1861 emit_brk(struct svga_shader_emitter *emit,
1862 const struct tgsi_full_instruction *insn)
1863 {
1864 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1865 return emit_instruction( emit, inst );
1866 }
1867
1868
1869 static boolean
1870 emit_scalar_op1(struct svga_shader_emitter *emit,
1871 unsigned opcode,
1872 const struct tgsi_full_instruction *insn)
1873 {
1874 SVGA3dShaderInstToken inst;
1875 SVGA3dShaderDestToken dst;
1876 struct src_register src;
1877
1878 inst = inst_token( opcode );
1879 dst = translate_dst_register( emit, insn, 0 );
1880 src = translate_src_register( emit, &insn->Src[0] );
1881 src = scalar( src, TGSI_SWIZZLE_X );
1882
1883 return submit_op1( emit, inst, dst, src );
1884 }
1885
1886
1887 static boolean
1888 emit_simple_instruction(struct svga_shader_emitter *emit,
1889 unsigned opcode,
1890 const struct tgsi_full_instruction *insn)
1891 {
1892 const struct tgsi_full_src_register *src = insn->Src;
1893 SVGA3dShaderInstToken inst;
1894 SVGA3dShaderDestToken dst;
1895
1896 inst = inst_token( opcode );
1897 dst = translate_dst_register( emit, insn, 0 );
1898
1899 switch (insn->Instruction.NumSrcRegs) {
1900 case 0:
1901 return submit_op0( emit, inst, dst );
1902 case 1:
1903 return submit_op1( emit, inst, dst,
1904 translate_src_register( emit, &src[0] ));
1905 case 2:
1906 return submit_op2( emit, inst, dst,
1907 translate_src_register( emit, &src[0] ),
1908 translate_src_register( emit, &src[1] ) );
1909 case 3:
1910 return submit_op3( emit, inst, dst,
1911 translate_src_register( emit, &src[0] ),
1912 translate_src_register( emit, &src[1] ),
1913 translate_src_register( emit, &src[2] ) );
1914 default:
1915 assert(0);
1916 return FALSE;
1917 }
1918 }
1919
1920
1921 static boolean
1922 emit_deriv(struct svga_shader_emitter *emit,
1923 const struct tgsi_full_instruction *insn )
1924 {
1925 if (emit->dynamic_branching_level > 0 &&
1926 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1927 {
1928 struct src_register zero = get_zero_immediate( emit );
1929 SVGA3dShaderDestToken dst =
1930 translate_dst_register( emit, insn, 0 );
1931
1932 /* Deriv opcodes not valid inside dynamic branching, workaround
1933 * by zeroing out the destination.
1934 */
1935 if (!submit_op1(emit,
1936 inst_token( SVGA3DOP_MOV ),
1937 dst,
1938 scalar(zero, TGSI_SWIZZLE_X)))
1939 return FALSE;
1940
1941 return TRUE;
1942 }
1943 else {
1944 unsigned opcode;
1945 const struct tgsi_full_src_register *reg = &insn->Src[0];
1946 SVGA3dShaderInstToken inst;
1947 SVGA3dShaderDestToken dst;
1948 struct src_register src0;
1949
1950 switch (insn->Instruction.Opcode) {
1951 case TGSI_OPCODE_DDX:
1952 opcode = SVGA3DOP_DSX;
1953 break;
1954 case TGSI_OPCODE_DDY:
1955 opcode = SVGA3DOP_DSY;
1956 break;
1957 default:
1958 return FALSE;
1959 }
1960
1961 inst = inst_token( opcode );
1962 dst = translate_dst_register( emit, insn, 0 );
1963 src0 = translate_src_register( emit, reg );
1964
1965 /* We cannot use negate or abs on source to dsx/dsy instruction.
1966 */
1967 if (reg->Register.Absolute ||
1968 reg->Register.Negate) {
1969 SVGA3dShaderDestToken temp = get_temp( emit );
1970
1971 if (!emit_repl( emit, temp, &src0 ))
1972 return FALSE;
1973 }
1974
1975 return submit_op1( emit, inst, dst, src0 );
1976 }
1977 }
1978
1979
1980 static boolean
1981 emit_arl(struct svga_shader_emitter *emit,
1982 const struct tgsi_full_instruction *insn)
1983 {
1984 ++emit->current_arl;
1985 if (emit->unit == PIPE_SHADER_FRAGMENT) {
1986 /* MOVA not present in pixel shader instruction set.
1987 * Ignore this instruction altogether since it is
1988 * only used for loop counters -- and for that
1989 * we reference aL directly.
1990 */
1991 return TRUE;
1992 }
1993 if (svga_arl_needs_adjustment( emit )) {
1994 return emit_fake_arl( emit, insn );
1995 } else {
1996 /* no need to adjust, just emit straight arl */
1997 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1998 }
1999 }
2000
2001
2002 static boolean
2003 emit_pow(struct svga_shader_emitter *emit,
2004 const struct tgsi_full_instruction *insn)
2005 {
2006 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2007 struct src_register src0 = translate_src_register(
2008 emit, &insn->Src[0] );
2009 struct src_register src1 = translate_src_register(
2010 emit, &insn->Src[1] );
2011 boolean need_tmp = FALSE;
2012
2013 /* POW can only output to a temporary */
2014 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2015 need_tmp = TRUE;
2016
2017 /* POW src1 must not be the same register as dst */
2018 if (alias_src_dst( src1, dst ))
2019 need_tmp = TRUE;
2020
2021 /* it's a scalar op */
2022 src0 = scalar( src0, TGSI_SWIZZLE_X );
2023 src1 = scalar( src1, TGSI_SWIZZLE_X );
2024
2025 if (need_tmp) {
2026 SVGA3dShaderDestToken tmp =
2027 writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2028
2029 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2030 return FALSE;
2031
2032 return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2033 dst, scalar(src(tmp), 0) );
2034 }
2035 else {
2036 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2037 }
2038 }
2039
2040
2041 static boolean
2042 emit_xpd(struct svga_shader_emitter *emit,
2043 const struct tgsi_full_instruction *insn)
2044 {
2045 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2046 const struct src_register src0 = translate_src_register(
2047 emit, &insn->Src[0] );
2048 const struct src_register src1 = translate_src_register(
2049 emit, &insn->Src[1] );
2050 boolean need_dst_tmp = FALSE;
2051
2052 /* XPD can only output to a temporary */
2053 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
2054 need_dst_tmp = TRUE;
2055
2056 /* The dst reg must not be the same as src0 or src1*/
2057 if (alias_src_dst(src0, dst) ||
2058 alias_src_dst(src1, dst))
2059 need_dst_tmp = TRUE;
2060
2061 if (need_dst_tmp) {
2062 SVGA3dShaderDestToken tmp = get_temp( emit );
2063
2064 /* Obey DX9 restrictions on mask:
2065 */
2066 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
2067
2068 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
2069 return FALSE;
2070
2071 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2072 return FALSE;
2073 }
2074 else {
2075 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
2076 return FALSE;
2077 }
2078
2079 /* Need to emit 1.0 to dst.w?
2080 */
2081 if (dst.mask & TGSI_WRITEMASK_W) {
2082 struct src_register zero = get_zero_immediate( emit );
2083
2084 if (!submit_op1(emit,
2085 inst_token( SVGA3DOP_MOV ),
2086 writemask(dst, TGSI_WRITEMASK_W),
2087 zero))
2088 return FALSE;
2089 }
2090
2091 return TRUE;
2092 }
2093
2094
2095 static boolean
2096 emit_lrp(struct svga_shader_emitter *emit,
2097 const struct tgsi_full_instruction *insn)
2098 {
2099 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2100 const struct src_register src0 = translate_src_register(
2101 emit, &insn->Src[0] );
2102 const struct src_register src1 = translate_src_register(
2103 emit, &insn->Src[1] );
2104 const struct src_register src2 = translate_src_register(
2105 emit, &insn->Src[2] );
2106
2107 return submit_lrp(emit, dst, src0, src1, src2);
2108 }
2109
2110
2111 static boolean
2112 emit_dst_insn(struct svga_shader_emitter *emit,
2113 const struct tgsi_full_instruction *insn)
2114 {
2115 if (emit->unit == PIPE_SHADER_VERTEX) {
2116 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2117 */
2118 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2119 }
2120 else {
2121 /* result[0] = 1 * 1;
2122 * result[1] = a[1] * b[1];
2123 * result[2] = a[2] * 1;
2124 * result[3] = 1 * b[3];
2125 */
2126 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2127 SVGA3dShaderDestToken tmp;
2128 const struct src_register src0 = translate_src_register(
2129 emit, &insn->Src[0] );
2130 const struct src_register src1 = translate_src_register(
2131 emit, &insn->Src[1] );
2132 struct src_register zero = get_zero_immediate( emit );
2133 boolean need_tmp = FALSE;
2134
2135 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2136 alias_src_dst(src0, dst) ||
2137 alias_src_dst(src1, dst))
2138 need_tmp = TRUE;
2139
2140 if (need_tmp) {
2141 tmp = get_temp( emit );
2142 }
2143 else {
2144 tmp = dst;
2145 }
2146
2147 /* tmp.xw = 1.0
2148 */
2149 if (tmp.mask & TGSI_WRITEMASK_XW) {
2150 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2151 writemask(tmp, TGSI_WRITEMASK_XW ),
2152 scalar( zero, 3 )))
2153 return FALSE;
2154 }
2155
2156 /* tmp.yz = src0
2157 */
2158 if (tmp.mask & TGSI_WRITEMASK_YZ) {
2159 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2160 writemask(tmp, TGSI_WRITEMASK_YZ ),
2161 src0))
2162 return FALSE;
2163 }
2164
2165 /* tmp.yw = tmp * src1
2166 */
2167 if (tmp.mask & TGSI_WRITEMASK_YW) {
2168 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2169 writemask(tmp, TGSI_WRITEMASK_YW ),
2170 src(tmp),
2171 src1))
2172 return FALSE;
2173 }
2174
2175 /* dst = tmp
2176 */
2177 if (need_tmp) {
2178 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2179 dst,
2180 src(tmp)))
2181 return FALSE;
2182 }
2183 }
2184
2185 return TRUE;
2186 }
2187
2188
2189 static boolean
2190 emit_exp(struct svga_shader_emitter *emit,
2191 const struct tgsi_full_instruction *insn)
2192 {
2193 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2194 struct src_register src0 =
2195 translate_src_register( emit, &insn->Src[0] );
2196 struct src_register zero = get_zero_immediate( emit );
2197 SVGA3dShaderDestToken fraction;
2198
2199 if (dst.mask & TGSI_WRITEMASK_Y)
2200 fraction = dst;
2201 else if (dst.mask & TGSI_WRITEMASK_X)
2202 fraction = get_temp( emit );
2203 else
2204 fraction.value = 0;
2205
2206 /* If y is being written, fill it with src0 - floor(src0).
2207 */
2208 if (dst.mask & TGSI_WRITEMASK_XY) {
2209 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2210 writemask( fraction, TGSI_WRITEMASK_Y ),
2211 src0 ))
2212 return FALSE;
2213 }
2214
2215 /* If x is being written, fill it with 2 ^ floor(src0).
2216 */
2217 if (dst.mask & TGSI_WRITEMASK_X) {
2218 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2219 writemask( dst, TGSI_WRITEMASK_X ),
2220 src0,
2221 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2222 return FALSE;
2223
2224 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2225 writemask( dst, TGSI_WRITEMASK_X ),
2226 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2227 return FALSE;
2228
2229 if (!(dst.mask & TGSI_WRITEMASK_Y))
2230 release_temp( emit, fraction );
2231 }
2232
2233 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2234 */
2235 if (dst.mask & TGSI_WRITEMASK_Z) {
2236 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2237 writemask( dst, TGSI_WRITEMASK_Z ),
2238 src0 ) )
2239 return FALSE;
2240 }
2241
2242 /* If w is being written, fill it with one.
2243 */
2244 if (dst.mask & TGSI_WRITEMASK_W) {
2245 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2246 writemask(dst, TGSI_WRITEMASK_W),
2247 scalar( zero, TGSI_SWIZZLE_W ) ))
2248 return FALSE;
2249 }
2250
2251 return TRUE;
2252 }
2253
2254
2255 static boolean
2256 emit_lit(struct svga_shader_emitter *emit,
2257 const struct tgsi_full_instruction *insn)
2258 {
2259 if (emit->unit == PIPE_SHADER_VERTEX) {
2260 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2261 */
2262 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2263 }
2264 else {
2265 /* D3D vs. GL semantics can be fairly easily accomodated by
2266 * variations on this sequence.
2267 *
2268 * GL:
2269 * tmp.y = src.x
2270 * tmp.z = pow(src.y,src.w)
2271 * p0 = src0.xxxx > 0
2272 * result = zero.wxxw
2273 * (p0) result.yz = tmp
2274 *
2275 * D3D:
2276 * tmp.y = src.x
2277 * tmp.z = pow(src.y,src.w)
2278 * p0 = src0.xxyy > 0
2279 * result = zero.wxxw
2280 * (p0) result.yz = tmp
2281 *
2282 * Will implement the GL version for now.
2283 */
2284 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2285 SVGA3dShaderDestToken tmp = get_temp( emit );
2286 const struct src_register src0 = translate_src_register(
2287 emit, &insn->Src[0] );
2288 struct src_register zero = get_zero_immediate( emit );
2289
2290 /* tmp = pow(src.y, src.w)
2291 */
2292 if (dst.mask & TGSI_WRITEMASK_Z) {
2293 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2294 tmp,
2295 scalar(src0, 1),
2296 scalar(src0, 3)))
2297 return FALSE;
2298 }
2299
2300 /* tmp.y = src.x
2301 */
2302 if (dst.mask & TGSI_WRITEMASK_Y) {
2303 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2304 writemask(tmp, TGSI_WRITEMASK_Y ),
2305 scalar(src0, 0)))
2306 return FALSE;
2307 }
2308
2309 /* Can't quite do this with emit conditional due to the extra
2310 * writemask on the predicated mov:
2311 */
2312 {
2313 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2314 SVGA3dShaderInstToken setp_token, mov_token;
2315 struct src_register predsrc;
2316
2317 setp_token = inst_token( SVGA3DOP_SETP );
2318 mov_token = inst_token( SVGA3DOP_MOV );
2319
2320 setp_token.control = SVGA3DOPCOMP_GT;
2321
2322 /* D3D vs GL semantics:
2323 */
2324 if (0)
2325 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2326 else
2327 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2328
2329 /* SETP src0.xxyy, GT, {0}.x */
2330 if (!submit_op2( emit, setp_token, pred_reg,
2331 predsrc,
2332 swizzle(zero, 0, 0, 0, 0) ))
2333 return FALSE;
2334
2335 /* MOV dst, fail */
2336 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2337 swizzle(zero, 3, 0, 0, 3 )))
2338 return FALSE;
2339
2340 /* MOV dst.yz, tmp (predicated)
2341 *
2342 * Note that the predicate reg (and possible modifiers) is passed
2343 * as the first source argument.
2344 */
2345 if (dst.mask & TGSI_WRITEMASK_YZ) {
2346 mov_token.predicated = 1;
2347 if (!submit_op2( emit, mov_token,
2348 writemask(dst, TGSI_WRITEMASK_YZ),
2349 src( pred_reg ), src( tmp ) ))
2350 return FALSE;
2351 }
2352 }
2353 }
2354
2355 return TRUE;
2356 }
2357
2358
2359 static boolean
2360 emit_ex2(struct svga_shader_emitter *emit,
2361 const struct tgsi_full_instruction *insn)
2362 {
2363 SVGA3dShaderInstToken inst;
2364 SVGA3dShaderDestToken dst;
2365 struct src_register src0;
2366
2367 inst = inst_token( SVGA3DOP_EXP );
2368 dst = translate_dst_register( emit, insn, 0 );
2369 src0 = translate_src_register( emit, &insn->Src[0] );
2370 src0 = scalar( src0, TGSI_SWIZZLE_X );
2371
2372 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2373 SVGA3dShaderDestToken tmp = get_temp( emit );
2374
2375 if (!submit_op1( emit, inst, tmp, src0 ))
2376 return FALSE;
2377
2378 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2379 dst,
2380 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2381 }
2382
2383 return submit_op1( emit, inst, dst, src0 );
2384 }
2385
2386
2387 static boolean
2388 emit_log(struct svga_shader_emitter *emit,
2389 const struct tgsi_full_instruction *insn)
2390 {
2391 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2392 struct src_register src0 =
2393 translate_src_register( emit, &insn->Src[0] );
2394 struct src_register zero = get_zero_immediate( emit );
2395 SVGA3dShaderDestToken abs_tmp;
2396 struct src_register abs_src0;
2397 SVGA3dShaderDestToken log2_abs;
2398
2399 abs_tmp.value = 0;
2400
2401 if (dst.mask & TGSI_WRITEMASK_Z)
2402 log2_abs = dst;
2403 else if (dst.mask & TGSI_WRITEMASK_XY)
2404 log2_abs = get_temp( emit );
2405 else
2406 log2_abs.value = 0;
2407
2408 /* If z is being written, fill it with log2( abs( src0 ) ).
2409 */
2410 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2411 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2412 abs_src0 = src0;
2413 else {
2414 abs_tmp = get_temp( emit );
2415
2416 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2417 abs_tmp,
2418 src0 ) )
2419 return FALSE;
2420
2421 abs_src0 = src( abs_tmp );
2422 }
2423
2424 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2425
2426 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2427 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2428 abs_src0 ) )
2429 return FALSE;
2430 }
2431
2432 if (dst.mask & TGSI_WRITEMASK_XY) {
2433 SVGA3dShaderDestToken floor_log2;
2434
2435 if (dst.mask & TGSI_WRITEMASK_X)
2436 floor_log2 = dst;
2437 else
2438 floor_log2 = get_temp( emit );
2439
2440 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2441 */
2442 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2443 writemask( floor_log2, TGSI_WRITEMASK_X ),
2444 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2445 return FALSE;
2446
2447 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2448 writemask( floor_log2, TGSI_WRITEMASK_X ),
2449 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2450 negate( src( floor_log2 ) ) ) )
2451 return FALSE;
2452
2453 /* If y is being written, fill it with
2454 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2455 */
2456 if (dst.mask & TGSI_WRITEMASK_Y) {
2457 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2458 writemask( dst, TGSI_WRITEMASK_Y ),
2459 negate( scalar( src( floor_log2 ),
2460 TGSI_SWIZZLE_X ) ) ) )
2461 return FALSE;
2462
2463 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2464 writemask( dst, TGSI_WRITEMASK_Y ),
2465 src( dst ),
2466 abs_src0 ) )
2467 return FALSE;
2468 }
2469
2470 if (!(dst.mask & TGSI_WRITEMASK_X))
2471 release_temp( emit, floor_log2 );
2472
2473 if (!(dst.mask & TGSI_WRITEMASK_Z))
2474 release_temp( emit, log2_abs );
2475 }
2476
2477 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2478 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2479 release_temp( emit, abs_tmp );
2480
2481 /* If w is being written, fill it with one.
2482 */
2483 if (dst.mask & TGSI_WRITEMASK_W) {
2484 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2485 writemask(dst, TGSI_WRITEMASK_W),
2486 scalar( zero, TGSI_SWIZZLE_W ) ))
2487 return FALSE;
2488 }
2489
2490 return TRUE;
2491 }
2492
2493
2494 /**
2495 * Translate TGSI TRUNC or ROUND instruction.
2496 * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2497 * Different approaches are needed for VS versus PS.
2498 */
2499 static boolean
2500 emit_trunc_round(struct svga_shader_emitter *emit,
2501 const struct tgsi_full_instruction *insn,
2502 boolean round)
2503 {
2504 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2505 const struct src_register src0 =
2506 translate_src_register(emit, &insn->Src[0] );
2507 SVGA3dShaderDestToken t1 = get_temp(emit);
2508
2509 if (round) {
2510 SVGA3dShaderDestToken t0 = get_temp(emit);
2511 struct src_register half = get_half_immediate(emit);
2512
2513 /* t0 = abs(src0) + 0.5 */
2514 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2515 absolute(src0), half))
2516 return FALSE;
2517
2518 /* t1 = fract(t0) */
2519 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2520 return FALSE;
2521
2522 /* t1 = t0 - t1 */
2523 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2524 negate(src(t1))))
2525 return FALSE;
2526 }
2527 else {
2528 /* trunc */
2529
2530 /* t1 = fract(abs(src0)) */
2531 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2532 return FALSE;
2533
2534 /* t1 = abs(src0) - t1 */
2535 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2536 negate(src(t1))))
2537 return FALSE;
2538 }
2539
2540 /*
2541 * Now we need to multiply t1 by the sign of the original value.
2542 */
2543 if (emit->unit == PIPE_SHADER_VERTEX) {
2544 /* For VS: use SGN instruction */
2545 /* Need two extra/dummy registers: */
2546 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2547 t4 = get_temp(emit);
2548
2549 /* t2 = sign(src0) */
2550 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2551 src(t3), src(t4)))
2552 return FALSE;
2553
2554 /* dst = t1 * t2 */
2555 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2556 return FALSE;
2557 }
2558 else {
2559 /* For FS: Use CMP instruction */
2560 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2561 src0, src(t1), negate(src(t1)));
2562 }
2563
2564 return TRUE;
2565 }
2566
2567
2568 static boolean
2569 emit_bgnsub(struct svga_shader_emitter *emit,
2570 unsigned position,
2571 const struct tgsi_full_instruction *insn)
2572 {
2573 unsigned i;
2574
2575 /* Note that we've finished the main function and are now emitting
2576 * subroutines. This affects how we terminate the generated
2577 * shader.
2578 */
2579 emit->in_main_func = FALSE;
2580
2581 for (i = 0; i < emit->nr_labels; i++) {
2582 if (emit->label[i] == position) {
2583 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2584 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2585 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2586 }
2587 }
2588
2589 assert(0);
2590 return TRUE;
2591 }
2592
2593
2594 static boolean
2595 emit_call(struct svga_shader_emitter *emit,
2596 const struct tgsi_full_instruction *insn)
2597 {
2598 unsigned position = insn->Label.Label;
2599 unsigned i;
2600
2601 for (i = 0; i < emit->nr_labels; i++) {
2602 if (emit->label[i] == position)
2603 break;
2604 }
2605
2606 if (emit->nr_labels == Elements(emit->label))
2607 return FALSE;
2608
2609 if (i == emit->nr_labels) {
2610 emit->label[i] = position;
2611 emit->nr_labels++;
2612 }
2613
2614 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2615 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2616 }
2617
2618
2619 /**
2620 * Called at the end of the shader. Actually, emit special "fix-up"
2621 * code for the vertex/fragment shader.
2622 */
2623 static boolean
2624 emit_end(struct svga_shader_emitter *emit)
2625 {
2626 if (emit->unit == PIPE_SHADER_VERTEX) {
2627 return emit_vs_postamble( emit );
2628 }
2629 else {
2630 return emit_ps_postamble( emit );
2631 }
2632 }
2633
2634
2635
2636 static boolean
2637 svga_emit_instruction(struct svga_shader_emitter *emit,
2638 unsigned position,
2639 const struct tgsi_full_instruction *insn)
2640 {
2641 switch (insn->Instruction.Opcode) {
2642
2643 case TGSI_OPCODE_ARL:
2644 return emit_arl( emit, insn );
2645
2646 case TGSI_OPCODE_TEX:
2647 case TGSI_OPCODE_TXB:
2648 case TGSI_OPCODE_TXP:
2649 case TGSI_OPCODE_TXL:
2650 case TGSI_OPCODE_TXD:
2651 return emit_tex( emit, insn );
2652
2653 case TGSI_OPCODE_DDX:
2654 case TGSI_OPCODE_DDY:
2655 return emit_deriv( emit, insn );
2656
2657 case TGSI_OPCODE_BGNSUB:
2658 return emit_bgnsub( emit, position, insn );
2659
2660 case TGSI_OPCODE_ENDSUB:
2661 return TRUE;
2662
2663 case TGSI_OPCODE_CAL:
2664 return emit_call( emit, insn );
2665
2666 case TGSI_OPCODE_FLR:
2667 return emit_floor( emit, insn );
2668
2669 case TGSI_OPCODE_TRUNC:
2670 return emit_trunc_round( emit, insn, FALSE );
2671
2672 case TGSI_OPCODE_ROUND:
2673 return emit_trunc_round( emit, insn, TRUE );
2674
2675 case TGSI_OPCODE_CEIL:
2676 return emit_ceil( emit, insn );
2677
2678 case TGSI_OPCODE_CMP:
2679 return emit_cmp( emit, insn );
2680
2681 case TGSI_OPCODE_DIV:
2682 return emit_div( emit, insn );
2683
2684 case TGSI_OPCODE_DP2:
2685 return emit_dp2( emit, insn );
2686
2687 case TGSI_OPCODE_DPH:
2688 return emit_dph( emit, insn );
2689
2690 case TGSI_OPCODE_NRM:
2691 return emit_nrm( emit, insn );
2692
2693 case TGSI_OPCODE_COS:
2694 return emit_cos( emit, insn );
2695
2696 case TGSI_OPCODE_SIN:
2697 return emit_sin( emit, insn );
2698
2699 case TGSI_OPCODE_SCS:
2700 return emit_sincos( emit, insn );
2701
2702 case TGSI_OPCODE_END:
2703 /* TGSI always finishes the main func with an END */
2704 return emit_end( emit );
2705
2706 case TGSI_OPCODE_KIL:
2707 return emit_kil( emit, insn );
2708
2709 /* Selection opcodes. The underlying language is fairly
2710 * non-orthogonal about these.
2711 */
2712 case TGSI_OPCODE_SEQ:
2713 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2714
2715 case TGSI_OPCODE_SNE:
2716 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2717
2718 case TGSI_OPCODE_SGT:
2719 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2720
2721 case TGSI_OPCODE_SGE:
2722 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2723
2724 case TGSI_OPCODE_SLT:
2725 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2726
2727 case TGSI_OPCODE_SLE:
2728 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2729
2730 case TGSI_OPCODE_SUB:
2731 return emit_sub( emit, insn );
2732
2733 case TGSI_OPCODE_POW:
2734 return emit_pow( emit, insn );
2735
2736 case TGSI_OPCODE_EX2:
2737 return emit_ex2( emit, insn );
2738
2739 case TGSI_OPCODE_EXP:
2740 return emit_exp( emit, insn );
2741
2742 case TGSI_OPCODE_LOG:
2743 return emit_log( emit, insn );
2744
2745 case TGSI_OPCODE_LG2:
2746 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2747
2748 case TGSI_OPCODE_RSQ:
2749 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2750
2751 case TGSI_OPCODE_RCP:
2752 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2753
2754 case TGSI_OPCODE_CONT:
2755 case TGSI_OPCODE_RET:
2756 /* This is a noop -- we tell mesa that we can't support RET
2757 * within a function (early return), so this will always be
2758 * followed by an ENDSUB.
2759 */
2760 return TRUE;
2761
2762 /* These aren't actually used by any of the frontends we care
2763 * about:
2764 */
2765 case TGSI_OPCODE_CLAMP:
2766 case TGSI_OPCODE_AND:
2767 case TGSI_OPCODE_OR:
2768 case TGSI_OPCODE_I2F:
2769 case TGSI_OPCODE_NOT:
2770 case TGSI_OPCODE_SHL:
2771 case TGSI_OPCODE_ISHR:
2772 case TGSI_OPCODE_XOR:
2773 return FALSE;
2774
2775 case TGSI_OPCODE_IF:
2776 return emit_if( emit, insn );
2777 case TGSI_OPCODE_ELSE:
2778 return emit_else( emit, insn );
2779 case TGSI_OPCODE_ENDIF:
2780 return emit_endif( emit, insn );
2781
2782 case TGSI_OPCODE_BGNLOOP:
2783 return emit_bgnloop2( emit, insn );
2784 case TGSI_OPCODE_ENDLOOP:
2785 return emit_endloop2( emit, insn );
2786 case TGSI_OPCODE_BRK:
2787 return emit_brk( emit, insn );
2788
2789 case TGSI_OPCODE_XPD:
2790 return emit_xpd( emit, insn );
2791
2792 case TGSI_OPCODE_KILP:
2793 return emit_kilp( emit, insn );
2794
2795 case TGSI_OPCODE_DST:
2796 return emit_dst_insn( emit, insn );
2797
2798 case TGSI_OPCODE_LIT:
2799 return emit_lit( emit, insn );
2800
2801 case TGSI_OPCODE_LRP:
2802 return emit_lrp( emit, insn );
2803
2804 case TGSI_OPCODE_SSG:
2805 return emit_ssg( emit, insn );
2806
2807 default:
2808 {
2809 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2810
2811 if (opcode == SVGA3DOP_LAST_INST)
2812 return FALSE;
2813
2814 if (!emit_simple_instruction( emit, opcode, insn ))
2815 return FALSE;
2816 }
2817 }
2818
2819 return TRUE;
2820 }
2821
2822
2823 static boolean
2824 svga_emit_immediate(struct svga_shader_emitter *emit,
2825 struct tgsi_full_immediate *imm)
2826 {
2827 static const float id[4] = {0,0,0,1};
2828 float value[4];
2829 unsigned i;
2830
2831 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2832 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
2833 float f = imm->u[i].Float;
2834 value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
2835 }
2836
2837 for ( ; i < 4; i++ )
2838 value[i] = id[i];
2839
2840 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2841 emit->imm_start + emit->internal_imm_count++,
2842 value[0], value[1], value[2], value[3]);
2843 }
2844
2845
2846 static boolean
2847 make_immediate(struct svga_shader_emitter *emit,
2848 float a, float b, float c, float d,
2849 struct src_register *out )
2850 {
2851 unsigned idx = emit->nr_hw_float_const++;
2852
2853 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2854 idx, a, b, c, d ))
2855 return FALSE;
2856
2857 *out = src_register( SVGA3DREG_CONST, idx );
2858
2859 return TRUE;
2860 }
2861
2862
2863 static boolean
2864 emit_vs_preamble(struct svga_shader_emitter *emit)
2865 {
2866 if (!emit->key.vkey.need_prescale) {
2867 if (!make_immediate( emit, 0, 0, .5, .5,
2868 &emit->imm_0055))
2869 return FALSE;
2870 }
2871
2872 return TRUE;
2873 }
2874
2875
2876 static boolean
2877 emit_ps_preamble(struct svga_shader_emitter *emit)
2878 {
2879 if (emit->ps_reads_pos && emit->info.reads_z) {
2880 /*
2881 * Assemble the position from various bits of inputs. Depth and W are
2882 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
2883 * Also fixup the perspective interpolation.
2884 *
2885 * temp_pos.xy = vPos.xy
2886 * temp_pos.w = rcp(texcoord1.w);
2887 * temp_pos.z = texcoord1.z * temp_pos.w;
2888 */
2889 if (!submit_op1( emit,
2890 inst_token(SVGA3DOP_MOV),
2891 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
2892 emit->ps_true_pos ))
2893 return FALSE;
2894
2895 if (!submit_op1( emit,
2896 inst_token(SVGA3DOP_RCP),
2897 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
2898 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
2899 return FALSE;
2900
2901 if (!submit_op2( emit,
2902 inst_token(SVGA3DOP_MUL),
2903 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
2904 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
2905 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
2906 return FALSE;
2907 }
2908
2909 return TRUE;
2910 }
2911
2912
2913 static boolean
2914 emit_ps_postamble(struct svga_shader_emitter *emit)
2915 {
2916 unsigned i;
2917
2918 /* PS oDepth is incredibly fragile and it's very hard to catch the
2919 * types of usage that break it during shader emit. Easier just to
2920 * redirect the main program to a temporary and then only touch
2921 * oDepth with a hand-crafted MOV below.
2922 */
2923 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2924 if (!submit_op1( emit,
2925 inst_token(SVGA3DOP_MOV),
2926 emit->true_pos,
2927 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2928 return FALSE;
2929 }
2930
2931 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2932 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2933 /* Potentially override output colors with white for XOR
2934 * logicop workaround.
2935 */
2936 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2937 emit->key.fkey.white_fragments) {
2938 struct src_register one = scalar( get_zero_immediate( emit ),
2939 TGSI_SWIZZLE_W );
2940
2941 if (!submit_op1( emit,
2942 inst_token(SVGA3DOP_MOV),
2943 emit->true_col[i],
2944 one ))
2945 return FALSE;
2946 }
2947 else {
2948 if (!submit_op1( emit,
2949 inst_token(SVGA3DOP_MOV),
2950 emit->true_col[i],
2951 src(emit->temp_col[i]) ))
2952 return FALSE;
2953 }
2954 }
2955 }
2956
2957 return TRUE;
2958 }
2959
2960
2961 static boolean
2962 emit_vs_postamble(struct svga_shader_emitter *emit)
2963 {
2964 /* PSIZ output is incredibly fragile and it's very hard to catch
2965 * the types of usage that break it during shader emit. Easier
2966 * just to redirect the main program to a temporary and then only
2967 * touch PSIZ with a hand-crafted MOV below.
2968 */
2969 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2970 if (!submit_op1( emit,
2971 inst_token(SVGA3DOP_MOV),
2972 emit->true_psiz,
2973 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2974 return FALSE;
2975 }
2976
2977 /* Need to perform various manipulations on vertex position to cope
2978 * with the different GL and D3D clip spaces.
2979 */
2980 if (emit->key.vkey.need_prescale) {
2981 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2982 SVGA3dShaderDestToken depth = emit->depth_pos;
2983 SVGA3dShaderDestToken pos = emit->true_pos;
2984 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2985 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2986 offset + 0 );
2987 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2988 offset + 1 );
2989
2990 if (!submit_op1( emit,
2991 inst_token(SVGA3DOP_MOV),
2992 writemask(depth, TGSI_WRITEMASK_W),
2993 scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
2994 return FALSE;
2995
2996 /* MUL temp_pos.xyz, temp_pos, prescale.scale
2997 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2998 * --> Note that prescale.trans.w == 0
2999 */
3000 if (!submit_op2( emit,
3001 inst_token(SVGA3DOP_MUL),
3002 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3003 src(temp_pos),
3004 prescale_scale ))
3005 return FALSE;
3006
3007 if (!submit_op3( emit,
3008 inst_token(SVGA3DOP_MAD),
3009 pos,
3010 swizzle(src(temp_pos), 3, 3, 3, 3),
3011 prescale_trans,
3012 src(temp_pos)))
3013 return FALSE;
3014
3015 /* Also write to depth value */
3016 if (!submit_op3( emit,
3017 inst_token(SVGA3DOP_MAD),
3018 writemask(depth, TGSI_WRITEMASK_Z),
3019 swizzle(src(temp_pos), 3, 3, 3, 3),
3020 prescale_trans,
3021 src(temp_pos) ))
3022 return FALSE;
3023 }
3024 else {
3025 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3026 SVGA3dShaderDestToken depth = emit->depth_pos;
3027 SVGA3dShaderDestToken pos = emit->true_pos;
3028 struct src_register imm_0055 = emit->imm_0055;
3029
3030 /* Adjust GL clipping coordinate space to hardware (D3D-style):
3031 *
3032 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3033 * MOV result.position, temp_pos
3034 */
3035 if (!submit_op2( emit,
3036 inst_token(SVGA3DOP_DP4),
3037 writemask(temp_pos, TGSI_WRITEMASK_Z),
3038 imm_0055,
3039 src(temp_pos) ))
3040 return FALSE;
3041
3042 if (!submit_op1( emit,
3043 inst_token(SVGA3DOP_MOV),
3044 pos,
3045 src(temp_pos) ))
3046 return FALSE;
3047
3048 /* Move the manipulated depth into the extra texcoord reg */
3049 if (!submit_op1( emit,
3050 inst_token(SVGA3DOP_MOV),
3051 writemask(depth, TGSI_WRITEMASK_ZW),
3052 src(temp_pos) ))
3053 return FALSE;
3054 }
3055
3056 return TRUE;
3057 }
3058
3059
3060 /**
3061 * For the pixel shader: emit the code which chooses the front
3062 * or back face color depending on triangle orientation.
3063 *
3064 * 0: IF VFACE :4
3065 * 1: COLOR = FrontColor;
3066 * 2: ELSE
3067 * 3: COLOR = BackColor;
3068 * 4: ENDIF
3069 */
3070 static boolean
3071 emit_light_twoside(struct svga_shader_emitter *emit)
3072 {
3073 struct src_register vface, zero;
3074 struct src_register front[2];
3075 struct src_register back[2];
3076 SVGA3dShaderDestToken color[2];
3077 int count = emit->internal_color_count;
3078 int i;
3079 SVGA3dShaderInstToken if_token;
3080
3081 if (count == 0)
3082 return TRUE;
3083
3084 vface = get_vface( emit );
3085 zero = get_zero_immediate( emit );
3086
3087 /* Can't use get_temp() to allocate the color reg as such
3088 * temporaries will be reclaimed after each instruction by the call
3089 * to reset_temp_regs().
3090 */
3091 for (i = 0; i < count; i++) {
3092 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3093 front[i] = emit->input_map[emit->internal_color_idx[i]];
3094
3095 /* Back is always the next input:
3096 */
3097 back[i] = front[i];
3098 back[i].base.num = front[i].base.num + 1;
3099
3100 /* Reassign the input_map to the actual front-face color:
3101 */
3102 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3103 }
3104
3105 if_token = inst_token( SVGA3DOP_IFC );
3106
3107 if (emit->key.fkey.front_ccw)
3108 if_token.control = SVGA3DOPCOMP_LT;
3109 else
3110 if_token.control = SVGA3DOPCOMP_GT;
3111
3112 zero = scalar(zero, TGSI_SWIZZLE_X);
3113
3114 if (!(emit_instruction( emit, if_token ) &&
3115 emit_src( emit, vface ) &&
3116 emit_src( emit, zero ) ))
3117 return FALSE;
3118
3119 for (i = 0; i < count; i++) {
3120 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3121 return FALSE;
3122 }
3123
3124 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3125 return FALSE;
3126
3127 for (i = 0; i < count; i++) {
3128 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3129 return FALSE;
3130 }
3131
3132 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3133 return FALSE;
3134
3135 return TRUE;
3136 }
3137
3138
3139 /**
3140 * 0: SETP_GT TEMP, VFACE, 0
3141 * where TEMP is a fake frontface register
3142 */
3143 static boolean
3144 emit_frontface(struct svga_shader_emitter *emit)
3145 {
3146 struct src_register vface, zero;
3147 SVGA3dShaderDestToken temp;
3148 struct src_register pass, fail;
3149
3150 vface = get_vface( emit );
3151 zero = get_zero_immediate( emit );
3152
3153 /* Can't use get_temp() to allocate the fake frontface reg as such
3154 * temporaries will be reclaimed after each instruction by the call
3155 * to reset_temp_regs().
3156 */
3157 temp = dst_register( SVGA3DREG_TEMP,
3158 emit->nr_hw_temp++ );
3159
3160 if (emit->key.fkey.front_ccw) {
3161 pass = scalar( zero, TGSI_SWIZZLE_X );
3162 fail = scalar( zero, TGSI_SWIZZLE_W );
3163 } else {
3164 pass = scalar( zero, TGSI_SWIZZLE_W );
3165 fail = scalar( zero, TGSI_SWIZZLE_X );
3166 }
3167
3168 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3169 temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
3170 pass, fail))
3171 return FALSE;
3172
3173 /* Reassign the input_map to the actual front-face color:
3174 */
3175 emit->input_map[emit->internal_frontface_idx] = src(temp);
3176
3177 return TRUE;
3178 }
3179
3180
3181 /**
3182 * Emit code to invert the T component of the incoming texture coordinate.
3183 * This is used for drawing point sprites when
3184 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3185 */
3186 static boolean
3187 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3188 {
3189 struct src_register zero = get_zero_immediate(emit);
3190 struct src_register pos_neg_one = get_pos_neg_one_immediate( emit );
3191 unsigned inverted_texcoords = emit->inverted_texcoords;
3192
3193 while (inverted_texcoords) {
3194 const unsigned unit = ffs(inverted_texcoords) - 1;
3195
3196 assert(emit->inverted_texcoords & (1 << unit));
3197
3198 assert(unit < Elements(emit->ps_true_texcoord));
3199
3200 assert(unit < Elements(emit->ps_inverted_texcoord_input));
3201
3202 assert(emit->ps_inverted_texcoord_input[unit]
3203 < Elements(emit->input_map));
3204
3205 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3206 if (!submit_op3(emit,
3207 inst_token(SVGA3DOP_MAD),
3208 dst(emit->ps_inverted_texcoord[unit]),
3209 emit->ps_true_texcoord[unit],
3210 swizzle(pos_neg_one, 0, 3, 0, 0), /* (1, -1, 1, 1) */
3211 swizzle(zero, 0, 3, 0, 0))) /* (0, 1, 0, 0) */
3212 return FALSE;
3213
3214 /* Reassign the input_map entry to the new texcoord register */
3215 emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3216 emit->ps_inverted_texcoord[unit];
3217
3218 inverted_texcoords &= ~(1 << unit);
3219 }
3220
3221 return TRUE;
3222 }
3223
3224
3225 static boolean
3226 needs_to_create_zero( struct svga_shader_emitter *emit )
3227 {
3228 unsigned i;
3229
3230 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3231 if (emit->key.fkey.light_twoside)
3232 return TRUE;
3233
3234 if (emit->key.fkey.white_fragments)
3235 return TRUE;
3236
3237 if (emit->emit_frontface)
3238 return TRUE;
3239
3240 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3241 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3242 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3243 return TRUE;
3244
3245 if (emit->inverted_texcoords)
3246 return TRUE;
3247
3248 /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3249 for (i = 0; i < emit->key.fkey.num_textures; i++) {
3250 if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3251 emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3252 emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3253 emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3254 return TRUE;
3255 }
3256
3257 for (i = 0; i < emit->key.fkey.num_textures; i++) {
3258 if (emit->key.fkey.tex[i].compare_mode
3259 == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3260 return TRUE;
3261 }
3262 }
3263
3264 if (emit->unit == PIPE_SHADER_VERTEX) {
3265 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3266 return TRUE;
3267 }
3268
3269 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3270 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3271 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3272 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3273 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3274 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3275 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3276 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3277 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3278 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3279 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3280 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3281 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3282 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3283 emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
3284 return TRUE;
3285
3286 return FALSE;
3287 }
3288
3289
3290 static boolean
3291 needs_to_create_loop_const( struct svga_shader_emitter *emit )
3292 {
3293 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3294 }
3295
3296
3297 static boolean
3298 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
3299 {
3300 return (emit->num_arl_consts > 0);
3301 }
3302
3303
3304 static boolean
3305 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3306 int num, int current_arl)
3307 {
3308 int i;
3309 assert(num < 0);
3310
3311 for (i = 0; i < emit->num_arl_consts; ++i) {
3312 if (emit->arl_consts[i].arl_num == current_arl)
3313 break;
3314 }
3315 /* new entry */
3316 if (emit->num_arl_consts == i) {
3317 ++emit->num_arl_consts;
3318 }
3319 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3320 num :
3321 emit->arl_consts[i].number;
3322 emit->arl_consts[i].arl_num = current_arl;
3323 return TRUE;
3324 }
3325
3326
3327 static boolean
3328 pre_parse_instruction( struct svga_shader_emitter *emit,
3329 const struct tgsi_full_instruction *insn,
3330 int current_arl)
3331 {
3332 if (insn->Src[0].Register.Indirect &&
3333 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3334 const struct tgsi_full_src_register *reg = &insn->Src[0];
3335 if (reg->Register.Index < 0) {
3336 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3337 }
3338 }
3339
3340 if (insn->Src[1].Register.Indirect &&
3341 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3342 const struct tgsi_full_src_register *reg = &insn->Src[1];
3343 if (reg->Register.Index < 0) {
3344 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3345 }
3346 }
3347
3348 if (insn->Src[2].Register.Indirect &&
3349 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3350 const struct tgsi_full_src_register *reg = &insn->Src[2];
3351 if (reg->Register.Index < 0) {
3352 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3353 }
3354 }
3355
3356 return TRUE;
3357 }
3358
3359
3360 static boolean
3361 pre_parse_tokens( struct svga_shader_emitter *emit,
3362 const struct tgsi_token *tokens )
3363 {
3364 struct tgsi_parse_context parse;
3365 int current_arl = 0;
3366
3367 tgsi_parse_init( &parse, tokens );
3368
3369 while (!tgsi_parse_end_of_tokens( &parse )) {
3370 tgsi_parse_token( &parse );
3371 switch (parse.FullToken.Token.Type) {
3372 case TGSI_TOKEN_TYPE_IMMEDIATE:
3373 case TGSI_TOKEN_TYPE_DECLARATION:
3374 break;
3375 case TGSI_TOKEN_TYPE_INSTRUCTION:
3376 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3377 TGSI_OPCODE_ARL) {
3378 ++current_arl;
3379 }
3380 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3381 current_arl ))
3382 return FALSE;
3383 break;
3384 default:
3385 break;
3386 }
3387
3388 }
3389 return TRUE;
3390 }
3391
3392
3393 static boolean
3394 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3395 {
3396 if (needs_to_create_zero( emit )) {
3397 create_zero_immediate( emit );
3398 }
3399 if (needs_to_create_loop_const( emit )) {
3400 create_loop_const( emit );
3401 }
3402 if (needs_to_create_arl_consts( emit )) {
3403 create_arl_consts( emit );
3404 }
3405
3406 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3407 if (!emit_ps_preamble( emit ))
3408 return FALSE;
3409
3410 if (emit->key.fkey.light_twoside) {
3411 if (!emit_light_twoside( emit ))
3412 return FALSE;
3413 }
3414 if (emit->emit_frontface) {
3415 if (!emit_frontface( emit ))
3416 return FALSE;
3417 }
3418 if (emit->inverted_texcoords) {
3419 if (!emit_inverted_texcoords( emit ))
3420 return FALSE;
3421 }
3422 }
3423
3424 return TRUE;
3425 }
3426
3427
3428 boolean
3429 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3430 const struct tgsi_token *tokens)
3431 {
3432 struct tgsi_parse_context parse;
3433 boolean ret = TRUE;
3434 boolean helpers_emitted = FALSE;
3435 unsigned line_nr = 0;
3436
3437 tgsi_parse_init( &parse, tokens );
3438 emit->internal_imm_count = 0;
3439
3440 if (emit->unit == PIPE_SHADER_VERTEX) {
3441 ret = emit_vs_preamble( emit );
3442 if (!ret)
3443 goto done;
3444 }
3445
3446 pre_parse_tokens(emit, tokens);
3447
3448 while (!tgsi_parse_end_of_tokens( &parse )) {
3449 tgsi_parse_token( &parse );
3450
3451 switch (parse.FullToken.Token.Type) {
3452 case TGSI_TOKEN_TYPE_IMMEDIATE:
3453 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3454 if (!ret)
3455 goto done;
3456 break;
3457
3458 case TGSI_TOKEN_TYPE_DECLARATION:
3459 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3460 if (!ret)
3461 goto done;
3462 break;
3463
3464 case TGSI_TOKEN_TYPE_INSTRUCTION:
3465 if (!helpers_emitted) {
3466 if (!svga_shader_emit_helpers( emit ))
3467 goto done;
3468 helpers_emitted = TRUE;
3469 }
3470 ret = svga_emit_instruction( emit,
3471 line_nr++,
3472 &parse.FullToken.FullInstruction );
3473 if (!ret)
3474 goto done;
3475 break;
3476 default:
3477 break;
3478 }
3479
3480 reset_temp_regs( emit );
3481 }
3482
3483 /* Need to terminate the current subroutine. Note that the
3484 * hardware doesn't tolerate shaders without sub-routines
3485 * terminating with RET+END.
3486 */
3487 if (!emit->in_main_func) {
3488 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3489 if (!ret)
3490 goto done;
3491 }
3492
3493 assert(emit->dynamic_branching_level == 0);
3494
3495 /* Need to terminate the whole shader:
3496 */
3497 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3498 if (!ret)
3499 goto done;
3500
3501 done:
3502 tgsi_parse_free( &parse );
3503 return ret;
3504 }