svga: silence a couple unused variable warnings
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "util/u_memory.h"
31 #include "util/u_math.h"
32 #include "util/u_pstipple.h"
33
34 #include "svga_tgsi_emit.h"
35 #include "svga_context.h"
36
37
38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
40
41
42 static unsigned
43 translate_opcode(uint opcode)
44 {
45 switch (opcode) {
46 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
47 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
48 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
49 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
50 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
51 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
52 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
53 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
54 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
55 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
56 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
57 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
58 default:
59 assert(!"svga: unexpected opcode in translate_opcode()");
60 return SVGA3DOP_LAST_INST;
61 }
62 }
63
64
65 static unsigned
66 translate_file(unsigned file)
67 {
68 switch (file) {
69 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
70 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
71 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
72 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
73 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
74 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
75 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
76 default:
77 assert(!"svga: unexpected register file in translate_file()");
78 return SVGA3DREG_TEMP;
79 }
80 }
81
82
83 /**
84 * Translate a TGSI destination register to an SVGA3DShaderDestToken.
85 * \param insn the TGSI instruction
86 * \param idx which TGSI dest register to translate (usually (always?) zero)
87 */
88 static SVGA3dShaderDestToken
89 translate_dst_register( struct svga_shader_emitter *emit,
90 const struct tgsi_full_instruction *insn,
91 unsigned idx )
92 {
93 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
94 SVGA3dShaderDestToken dest;
95
96 switch (reg->Register.File) {
97 case TGSI_FILE_OUTPUT:
98 /* Output registers encode semantic information in their name.
99 * Need to lookup a table built at decl time:
100 */
101 dest = emit->output_map[reg->Register.Index];
102 emit->num_output_writes++;
103 break;
104
105 default:
106 {
107 unsigned index = reg->Register.Index;
108 assert(index < SVGA3D_TEMPREG_MAX);
109 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
110 dest = dst_register(translate_file(reg->Register.File), index);
111 }
112 break;
113 }
114
115 if (reg->Register.Indirect) {
116 debug_warning("Indirect indexing of dest registers is not supported!\n");
117 }
118
119 dest.mask = reg->Register.WriteMask;
120 assert(dest.mask);
121
122 if (insn->Instruction.Saturate)
123 dest.dstMod = SVGA3DDSTMOD_SATURATE;
124
125 return dest;
126 }
127
128
129 /**
130 * Apply a swizzle to a src_register, returning a new src_register
131 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
132 * would return SRC.YYZZ
133 */
134 static struct src_register
135 swizzle(struct src_register src,
136 unsigned x, unsigned y, unsigned z, unsigned w)
137 {
138 assert(x < 4);
139 assert(y < 4);
140 assert(z < 4);
141 assert(w < 4);
142 x = (src.base.swizzle >> (x * 2)) & 0x3;
143 y = (src.base.swizzle >> (y * 2)) & 0x3;
144 z = (src.base.swizzle >> (z * 2)) & 0x3;
145 w = (src.base.swizzle >> (w * 2)) & 0x3;
146
147 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
148
149 return src;
150 }
151
152
153 /**
154 * Apply a "scalar" swizzle to a src_register returning a new
155 * src_register where all the swizzle terms are the same.
156 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
157 */
158 static struct src_register
159 scalar(struct src_register src, unsigned comp)
160 {
161 assert(comp < 4);
162 return swizzle( src, comp, comp, comp, comp );
163 }
164
165
166 static boolean
167 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
168 {
169 unsigned i;
170
171 for (i = 0; i < emit->num_arl_consts; ++i) {
172 if (emit->arl_consts[i].arl_num == emit->current_arl)
173 return TRUE;
174 }
175 return FALSE;
176 }
177
178
179 static int
180 svga_arl_adjustment( const struct svga_shader_emitter *emit )
181 {
182 unsigned i;
183
184 for (i = 0; i < emit->num_arl_consts; ++i) {
185 if (emit->arl_consts[i].arl_num == emit->current_arl)
186 return emit->arl_consts[i].number;
187 }
188 return 0;
189 }
190
191
192 /**
193 * Translate a TGSI src register to a src_register.
194 */
195 static struct src_register
196 translate_src_register( const struct svga_shader_emitter *emit,
197 const struct tgsi_full_src_register *reg )
198 {
199 struct src_register src;
200
201 switch (reg->Register.File) {
202 case TGSI_FILE_INPUT:
203 /* Input registers are referred to by their semantic name rather
204 * than by index. Use the mapping build up from the decls:
205 */
206 src = emit->input_map[reg->Register.Index];
207 break;
208
209 case TGSI_FILE_IMMEDIATE:
210 /* Immediates are appended after TGSI constants in the D3D
211 * constant buffer.
212 */
213 src = src_register( translate_file( reg->Register.File ),
214 reg->Register.Index + emit->imm_start );
215 break;
216
217 default:
218 src = src_register( translate_file( reg->Register.File ),
219 reg->Register.Index );
220 break;
221 }
222
223 /* Indirect addressing.
224 */
225 if (reg->Register.Indirect) {
226 if (emit->unit == PIPE_SHADER_FRAGMENT) {
227 /* Pixel shaders have only loop registers for relative
228 * addressing into inputs. Ignore the redundant address
229 * register, the contents of aL should be in sync with it.
230 */
231 if (reg->Register.File == TGSI_FILE_INPUT) {
232 src.base.relAddr = 1;
233 src.indirect = src_token(SVGA3DREG_LOOP, 0);
234 }
235 }
236 else {
237 /* Constant buffers only.
238 */
239 if (reg->Register.File == TGSI_FILE_CONSTANT) {
240 /* we shift the offset towards the minimum */
241 if (svga_arl_needs_adjustment( emit )) {
242 src.base.num -= svga_arl_adjustment( emit );
243 }
244 src.base.relAddr = 1;
245
246 /* Not really sure what should go in the second token:
247 */
248 src.indirect = src_token( SVGA3DREG_ADDR,
249 reg->Indirect.Index );
250
251 src.indirect.swizzle = SWIZZLE_XXXX;
252 }
253 }
254 }
255
256 src = swizzle( src,
257 reg->Register.SwizzleX,
258 reg->Register.SwizzleY,
259 reg->Register.SwizzleZ,
260 reg->Register.SwizzleW );
261
262 /* src.mod isn't a bitfield, unfortunately:
263 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
264 */
265 if (reg->Register.Absolute) {
266 if (reg->Register.Negate)
267 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
268 else
269 src.base.srcMod = SVGA3DSRCMOD_ABS;
270 }
271 else {
272 if (reg->Register.Negate)
273 src.base.srcMod = SVGA3DSRCMOD_NEG;
274 else
275 src.base.srcMod = SVGA3DSRCMOD_NONE;
276 }
277
278 return src;
279 }
280
281
282 /*
283 * Get a temporary register.
284 * Note: if we exceed the temporary register limit we just use
285 * register SVGA3D_TEMPREG_MAX - 1.
286 */
287 static SVGA3dShaderDestToken
288 get_temp( struct svga_shader_emitter *emit )
289 {
290 int i = emit->nr_hw_temp + emit->internal_temp_count++;
291 if (i >= SVGA3D_TEMPREG_MAX) {
292 debug_warn_once("svga: Too many temporary registers used in shader\n");
293 i = SVGA3D_TEMPREG_MAX - 1;
294 }
295 return dst_register( SVGA3DREG_TEMP, i );
296 }
297
298
299 /**
300 * Release a single temp. Currently only effective if it was the last
301 * allocated temp, otherwise release will be delayed until the next
302 * call to reset_temp_regs().
303 */
304 static void
305 release_temp( struct svga_shader_emitter *emit,
306 SVGA3dShaderDestToken temp )
307 {
308 if (temp.num == emit->internal_temp_count - 1)
309 emit->internal_temp_count--;
310 }
311
312
313 /**
314 * Release all temps.
315 */
316 static void
317 reset_temp_regs(struct svga_shader_emitter *emit)
318 {
319 emit->internal_temp_count = 0;
320 }
321
322
323 /** Emit bytecode for a src_register */
324 static boolean
325 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
326 {
327 if (src.base.relAddr) {
328 assert(src.base.reserved0);
329 assert(src.indirect.reserved0);
330 return (svga_shader_emit_dword( emit, src.base.value ) &&
331 svga_shader_emit_dword( emit, src.indirect.value ));
332 }
333 else {
334 assert(src.base.reserved0);
335 return svga_shader_emit_dword( emit, src.base.value );
336 }
337 }
338
339
340 /** Emit bytecode for a dst_register */
341 static boolean
342 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
343 {
344 assert(dest.reserved0);
345 assert(dest.mask);
346 return svga_shader_emit_dword( emit, dest.value );
347 }
348
349
350 /** Emit bytecode for a 1-operand instruction */
351 static boolean
352 emit_op1(struct svga_shader_emitter *emit,
353 SVGA3dShaderInstToken inst,
354 SVGA3dShaderDestToken dest,
355 struct src_register src0)
356 {
357 return (emit_instruction(emit, inst) &&
358 emit_dst(emit, dest) &&
359 emit_src(emit, src0));
360 }
361
362
363 /** Emit bytecode for a 2-operand instruction */
364 static boolean
365 emit_op2(struct svga_shader_emitter *emit,
366 SVGA3dShaderInstToken inst,
367 SVGA3dShaderDestToken dest,
368 struct src_register src0,
369 struct src_register src1)
370 {
371 return (emit_instruction(emit, inst) &&
372 emit_dst(emit, dest) &&
373 emit_src(emit, src0) &&
374 emit_src(emit, src1));
375 }
376
377
378 /** Emit bytecode for a 3-operand instruction */
379 static boolean
380 emit_op3(struct svga_shader_emitter *emit,
381 SVGA3dShaderInstToken inst,
382 SVGA3dShaderDestToken dest,
383 struct src_register src0,
384 struct src_register src1,
385 struct src_register src2)
386 {
387 return (emit_instruction(emit, inst) &&
388 emit_dst(emit, dest) &&
389 emit_src(emit, src0) &&
390 emit_src(emit, src1) &&
391 emit_src(emit, src2));
392 }
393
394
395 /** Emit bytecode for a 4-operand instruction */
396 static boolean
397 emit_op4(struct svga_shader_emitter *emit,
398 SVGA3dShaderInstToken inst,
399 SVGA3dShaderDestToken dest,
400 struct src_register src0,
401 struct src_register src1,
402 struct src_register src2,
403 struct src_register src3)
404 {
405 return (emit_instruction(emit, inst) &&
406 emit_dst(emit, dest) &&
407 emit_src(emit, src0) &&
408 emit_src(emit, src1) &&
409 emit_src(emit, src2) &&
410 emit_src(emit, src3));
411 }
412
413
414 /**
415 * Apply the absolute value modifier to the given src_register, returning
416 * a new src_register.
417 */
418 static struct src_register
419 absolute(struct src_register src)
420 {
421 src.base.srcMod = SVGA3DSRCMOD_ABS;
422 return src;
423 }
424
425
426 /**
427 * Apply the negation modifier to the given src_register, returning
428 * a new src_register.
429 */
430 static struct src_register
431 negate(struct src_register src)
432 {
433 switch (src.base.srcMod) {
434 case SVGA3DSRCMOD_ABS:
435 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
436 break;
437 case SVGA3DSRCMOD_ABSNEG:
438 src.base.srcMod = SVGA3DSRCMOD_ABS;
439 break;
440 case SVGA3DSRCMOD_NEG:
441 src.base.srcMod = SVGA3DSRCMOD_NONE;
442 break;
443 case SVGA3DSRCMOD_NONE:
444 src.base.srcMod = SVGA3DSRCMOD_NEG;
445 break;
446 }
447 return src;
448 }
449
450
451
452 /* Replace the src with the temporary specified in the dst, but copying
453 * only the necessary channels, and preserving the original swizzle (which is
454 * important given that several opcodes have constraints in the allowed
455 * swizzles).
456 */
457 static boolean
458 emit_repl(struct svga_shader_emitter *emit,
459 SVGA3dShaderDestToken dst,
460 struct src_register *src0)
461 {
462 unsigned src0_swizzle;
463 unsigned chan;
464
465 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
466
467 src0_swizzle = src0->base.swizzle;
468
469 dst.mask = 0;
470 for (chan = 0; chan < 4; ++chan) {
471 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
472 dst.mask |= 1 << swizzle;
473 }
474 assert(dst.mask);
475
476 src0->base.swizzle = SVGA3DSWIZZLE_NONE;
477
478 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
479 return FALSE;
480
481 *src0 = src( dst );
482 src0->base.swizzle = src0_swizzle;
483
484 return TRUE;
485 }
486
487
488 /**
489 * Submit/emit an instruction with zero operands.
490 */
491 static boolean
492 submit_op0(struct svga_shader_emitter *emit,
493 SVGA3dShaderInstToken inst,
494 SVGA3dShaderDestToken dest)
495 {
496 return (emit_instruction( emit, inst ) &&
497 emit_dst( emit, dest ));
498 }
499
500
501 /**
502 * Submit/emit an instruction with one operand.
503 */
504 static boolean
505 submit_op1(struct svga_shader_emitter *emit,
506 SVGA3dShaderInstToken inst,
507 SVGA3dShaderDestToken dest,
508 struct src_register src0)
509 {
510 return emit_op1( emit, inst, dest, src0 );
511 }
512
513
514 /**
515 * Submit/emit an instruction with two operands.
516 *
517 * SVGA shaders may not refer to >1 constant register in a single
518 * instruction. This function checks for that usage and inserts a
519 * move to temporary if detected.
520 *
521 * The same applies to input registers -- at most a single input
522 * register may be read by any instruction.
523 */
524 static boolean
525 submit_op2(struct svga_shader_emitter *emit,
526 SVGA3dShaderInstToken inst,
527 SVGA3dShaderDestToken dest,
528 struct src_register src0,
529 struct src_register src1)
530 {
531 SVGA3dShaderDestToken temp;
532 SVGA3dShaderRegType type0, type1;
533 boolean need_temp = FALSE;
534
535 temp.value = 0;
536 type0 = SVGA3dShaderGetRegType( src0.base.value );
537 type1 = SVGA3dShaderGetRegType( src1.base.value );
538
539 if (type0 == SVGA3DREG_CONST &&
540 type1 == SVGA3DREG_CONST &&
541 src0.base.num != src1.base.num)
542 need_temp = TRUE;
543
544 if (type0 == SVGA3DREG_INPUT &&
545 type1 == SVGA3DREG_INPUT &&
546 src0.base.num != src1.base.num)
547 need_temp = TRUE;
548
549 if (need_temp) {
550 temp = get_temp( emit );
551
552 if (!emit_repl( emit, temp, &src0 ))
553 return FALSE;
554 }
555
556 if (!emit_op2( emit, inst, dest, src0, src1 ))
557 return FALSE;
558
559 if (need_temp)
560 release_temp( emit, temp );
561
562 return TRUE;
563 }
564
565
566 /**
567 * Submit/emit an instruction with three operands.
568 *
569 * SVGA shaders may not refer to >1 constant register in a single
570 * instruction. This function checks for that usage and inserts a
571 * move to temporary if detected.
572 */
573 static boolean
574 submit_op3(struct svga_shader_emitter *emit,
575 SVGA3dShaderInstToken inst,
576 SVGA3dShaderDestToken dest,
577 struct src_register src0,
578 struct src_register src1,
579 struct src_register src2)
580 {
581 SVGA3dShaderDestToken temp0;
582 SVGA3dShaderDestToken temp1;
583 boolean need_temp0 = FALSE;
584 boolean need_temp1 = FALSE;
585 SVGA3dShaderRegType type0, type1, type2;
586
587 temp0.value = 0;
588 temp1.value = 0;
589 type0 = SVGA3dShaderGetRegType( src0.base.value );
590 type1 = SVGA3dShaderGetRegType( src1.base.value );
591 type2 = SVGA3dShaderGetRegType( src2.base.value );
592
593 if (inst.op != SVGA3DOP_SINCOS) {
594 if (type0 == SVGA3DREG_CONST &&
595 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
596 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
597 need_temp0 = TRUE;
598
599 if (type1 == SVGA3DREG_CONST &&
600 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
601 need_temp1 = TRUE;
602 }
603
604 if (type0 == SVGA3DREG_INPUT &&
605 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
606 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
607 need_temp0 = TRUE;
608
609 if (type1 == SVGA3DREG_INPUT &&
610 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
611 need_temp1 = TRUE;
612
613 if (need_temp0) {
614 temp0 = get_temp( emit );
615
616 if (!emit_repl( emit, temp0, &src0 ))
617 return FALSE;
618 }
619
620 if (need_temp1) {
621 temp1 = get_temp( emit );
622
623 if (!emit_repl( emit, temp1, &src1 ))
624 return FALSE;
625 }
626
627 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
628 return FALSE;
629
630 if (need_temp1)
631 release_temp( emit, temp1 );
632 if (need_temp0)
633 release_temp( emit, temp0 );
634 return TRUE;
635 }
636
637
638 /**
639 * Submit/emit an instruction with four operands.
640 *
641 * SVGA shaders may not refer to >1 constant register in a single
642 * instruction. This function checks for that usage and inserts a
643 * move to temporary if detected.
644 */
645 static boolean
646 submit_op4(struct svga_shader_emitter *emit,
647 SVGA3dShaderInstToken inst,
648 SVGA3dShaderDestToken dest,
649 struct src_register src0,
650 struct src_register src1,
651 struct src_register src2,
652 struct src_register src3)
653 {
654 SVGA3dShaderDestToken temp0;
655 SVGA3dShaderDestToken temp3;
656 boolean need_temp0 = FALSE;
657 boolean need_temp3 = FALSE;
658 SVGA3dShaderRegType type0, type1, type2, type3;
659
660 temp0.value = 0;
661 temp3.value = 0;
662 type0 = SVGA3dShaderGetRegType( src0.base.value );
663 type1 = SVGA3dShaderGetRegType( src1.base.value );
664 type2 = SVGA3dShaderGetRegType( src2.base.value );
665 type3 = SVGA3dShaderGetRegType( src2.base.value );
666
667 /* Make life a little easier - this is only used by the TXD
668 * instruction which is guaranteed not to have a constant/input reg
669 * in one slot at least:
670 */
671 assert(type1 == SVGA3DREG_SAMPLER);
672 (void) type1;
673
674 if (type0 == SVGA3DREG_CONST &&
675 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
676 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
677 need_temp0 = TRUE;
678
679 if (type3 == SVGA3DREG_CONST &&
680 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
681 need_temp3 = TRUE;
682
683 if (type0 == SVGA3DREG_INPUT &&
684 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
685 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
686 need_temp0 = TRUE;
687
688 if (type3 == SVGA3DREG_INPUT &&
689 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
690 need_temp3 = TRUE;
691
692 if (need_temp0) {
693 temp0 = get_temp( emit );
694
695 if (!emit_repl( emit, temp0, &src0 ))
696 return FALSE;
697 }
698
699 if (need_temp3) {
700 temp3 = get_temp( emit );
701
702 if (!emit_repl( emit, temp3, &src3 ))
703 return FALSE;
704 }
705
706 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
707 return FALSE;
708
709 if (need_temp3)
710 release_temp( emit, temp3 );
711 if (need_temp0)
712 release_temp( emit, temp0 );
713 return TRUE;
714 }
715
716
717 /**
718 * Do the src and dest registers refer to the same register?
719 */
720 static boolean
721 alias_src_dst(struct src_register src,
722 SVGA3dShaderDestToken dst)
723 {
724 if (src.base.num != dst.num)
725 return FALSE;
726
727 if (SVGA3dShaderGetRegType(dst.value) !=
728 SVGA3dShaderGetRegType(src.base.value))
729 return FALSE;
730
731 return TRUE;
732 }
733
734
735 /**
736 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
737 * instructions.
738 */
739 static boolean
740 emit_def_const(struct svga_shader_emitter *emit,
741 SVGA3dShaderConstType type,
742 unsigned idx, float a, float b, float c, float d)
743 {
744 SVGA3DOpDefArgs def;
745 SVGA3dShaderInstToken opcode;
746
747 switch (type) {
748 case SVGA3D_CONST_TYPE_FLOAT:
749 opcode = inst_token( SVGA3DOP_DEF );
750 def.dst = dst_register( SVGA3DREG_CONST, idx );
751 def.constValues[0] = a;
752 def.constValues[1] = b;
753 def.constValues[2] = c;
754 def.constValues[3] = d;
755 break;
756 case SVGA3D_CONST_TYPE_INT:
757 opcode = inst_token( SVGA3DOP_DEFI );
758 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
759 def.constIValues[0] = (int)a;
760 def.constIValues[1] = (int)b;
761 def.constIValues[2] = (int)c;
762 def.constIValues[3] = (int)d;
763 break;
764 default:
765 assert(0);
766 opcode = inst_token( SVGA3DOP_NOP );
767 break;
768 }
769
770 if (!emit_instruction(emit, opcode) ||
771 !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values)))
772 return FALSE;
773
774 return TRUE;
775 }
776
777
778 static boolean
779 create_loop_const( struct svga_shader_emitter *emit )
780 {
781 unsigned idx = emit->nr_hw_int_const++;
782
783 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
784 255, /* iteration count */
785 0, /* initial value */
786 1, /* step size */
787 0 /* not used, must be 0 */))
788 return FALSE;
789
790 emit->loop_const_idx = idx;
791 emit->created_loop_const = TRUE;
792
793 return TRUE;
794 }
795
796 static boolean
797 create_arl_consts( struct svga_shader_emitter *emit )
798 {
799 int i;
800
801 for (i = 0; i < emit->num_arl_consts; i += 4) {
802 int j;
803 unsigned idx = emit->nr_hw_float_const++;
804 float vals[4];
805 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
806 vals[j] = (float) emit->arl_consts[i + j].number;
807 emit->arl_consts[i + j].idx = idx;
808 switch (j) {
809 case 0:
810 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
811 break;
812 case 1:
813 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
814 break;
815 case 2:
816 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
817 break;
818 case 3:
819 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
820 break;
821 }
822 }
823 while (j < 4)
824 vals[j++] = 0;
825
826 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
827 vals[0], vals[1],
828 vals[2], vals[3]))
829 return FALSE;
830 }
831
832 return TRUE;
833 }
834
835
836 /**
837 * Return the register which holds the pixel shaders front/back-
838 * facing value.
839 */
840 static struct src_register
841 get_vface( struct svga_shader_emitter *emit )
842 {
843 assert(emit->emitted_vface);
844 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
845 }
846
847
848 /**
849 * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
850 * We can swizzle this to produce other useful constants such as
851 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
852 */
853 static boolean
854 create_common_immediate( struct svga_shader_emitter *emit )
855 {
856 unsigned idx = emit->nr_hw_float_const++;
857
858 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
859 * other useful vectors.
860 */
861 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
862 idx, 0.0f, 0.5f, -1.0f, 1.0f ))
863 return FALSE;
864 emit->common_immediate_idx[0] = idx;
865 idx++;
866
867 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
868 if (emit->key.vs.adjust_attrib_range) {
869 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
870 idx, 2.0f, 0.0f, 0.0f, 0.0f ))
871 return FALSE;
872 emit->common_immediate_idx[1] = idx;
873 }
874 else {
875 emit->common_immediate_idx[1] = -1;
876 }
877
878 emit->created_common_immediate = TRUE;
879
880 return TRUE;
881 }
882
883
884 /**
885 * Return swizzle/position for the given value in the "common" immediate.
886 */
887 static inline unsigned
888 common_immediate_swizzle(float value)
889 {
890 if (value == 0.0f)
891 return TGSI_SWIZZLE_X;
892 else if (value == 0.5f)
893 return TGSI_SWIZZLE_Y;
894 else if (value == -1.0f)
895 return TGSI_SWIZZLE_Z;
896 else if (value == 1.0f)
897 return TGSI_SWIZZLE_W;
898 else {
899 assert(!"illegal value in common_immediate_swizzle");
900 return TGSI_SWIZZLE_X;
901 }
902 }
903
904
905 /**
906 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
907 */
908 static struct src_register
909 get_immediate(struct svga_shader_emitter *emit,
910 float x, float y, float z, float w)
911 {
912 unsigned sx = common_immediate_swizzle(x);
913 unsigned sy = common_immediate_swizzle(y);
914 unsigned sz = common_immediate_swizzle(z);
915 unsigned sw = common_immediate_swizzle(w);
916 assert(emit->created_common_immediate);
917 assert(emit->common_immediate_idx[0] >= 0);
918 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
919 sx, sy, sz, sw);
920 }
921
922
923 /**
924 * returns {0, 0, 0, 0} immediate
925 */
926 static struct src_register
927 get_zero_immediate( struct svga_shader_emitter *emit )
928 {
929 assert(emit->created_common_immediate);
930 assert(emit->common_immediate_idx[0] >= 0);
931 return swizzle(src_register( SVGA3DREG_CONST,
932 emit->common_immediate_idx[0]),
933 0, 0, 0, 0);
934 }
935
936
937 /**
938 * returns {1, 1, 1, 1} immediate
939 */
940 static struct src_register
941 get_one_immediate( struct svga_shader_emitter *emit )
942 {
943 assert(emit->created_common_immediate);
944 assert(emit->common_immediate_idx[0] >= 0);
945 return swizzle(src_register( SVGA3DREG_CONST,
946 emit->common_immediate_idx[0]),
947 3, 3, 3, 3);
948 }
949
950
951 /**
952 * returns {0.5, 0.5, 0.5, 0.5} immediate
953 */
954 static struct src_register
955 get_half_immediate( struct svga_shader_emitter *emit )
956 {
957 assert(emit->created_common_immediate);
958 assert(emit->common_immediate_idx[0] >= 0);
959 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
960 1, 1, 1, 1);
961 }
962
963
964 /**
965 * returns {2, 2, 2, 2} immediate
966 */
967 static struct src_register
968 get_two_immediate( struct svga_shader_emitter *emit )
969 {
970 /* Note we use the second common immediate here */
971 assert(emit->created_common_immediate);
972 assert(emit->common_immediate_idx[1] >= 0);
973 return swizzle(src_register( SVGA3DREG_CONST,
974 emit->common_immediate_idx[1]),
975 0, 0, 0, 0);
976 }
977
978
979 /**
980 * returns the loop const
981 */
982 static struct src_register
983 get_loop_const( struct svga_shader_emitter *emit )
984 {
985 assert(emit->created_loop_const);
986 assert(emit->loop_const_idx >= 0);
987 return src_register( SVGA3DREG_CONSTINT,
988 emit->loop_const_idx );
989 }
990
991
992 static struct src_register
993 get_fake_arl_const( struct svga_shader_emitter *emit )
994 {
995 struct src_register reg;
996 int idx = 0, swizzle = 0, i;
997
998 for (i = 0; i < emit->num_arl_consts; ++ i) {
999 if (emit->arl_consts[i].arl_num == emit->current_arl) {
1000 idx = emit->arl_consts[i].idx;
1001 swizzle = emit->arl_consts[i].swizzle;
1002 }
1003 }
1004
1005 reg = src_register( SVGA3DREG_CONST, idx );
1006 return scalar(reg, swizzle);
1007 }
1008
1009
1010 /**
1011 * Return a register which holds the width and height of the texture
1012 * currently bound to the given sampler.
1013 */
1014 static struct src_register
1015 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
1016 {
1017 int idx;
1018 struct src_register reg;
1019
1020 /* the width/height indexes start right after constants */
1021 idx = emit->key.tex[sampler_num].width_height_idx +
1022 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1023
1024 reg = src_register( SVGA3DREG_CONST, idx );
1025 return reg;
1026 }
1027
1028
1029 static boolean
1030 emit_fake_arl(struct svga_shader_emitter *emit,
1031 const struct tgsi_full_instruction *insn)
1032 {
1033 const struct src_register src0 =
1034 translate_src_register(emit, &insn->Src[0] );
1035 struct src_register src1 = get_fake_arl_const( emit );
1036 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1037 SVGA3dShaderDestToken tmp = get_temp( emit );
1038
1039 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1040 return FALSE;
1041
1042 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1043 src1))
1044 return FALSE;
1045
1046 /* replicate the original swizzle */
1047 src1 = src(tmp);
1048 src1.base.swizzle = src0.base.swizzle;
1049
1050 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1051 dst, src1 );
1052 }
1053
1054
1055 static boolean
1056 emit_if(struct svga_shader_emitter *emit,
1057 const struct tgsi_full_instruction *insn)
1058 {
1059 struct src_register src0 =
1060 translate_src_register(emit, &insn->Src[0]);
1061 struct src_register zero = get_zero_immediate(emit);
1062 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1063
1064 if_token.control = SVGA3DOPCOMPC_NE;
1065
1066 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1067 /*
1068 * Max different constant registers readable per IFC instruction is 1.
1069 */
1070 SVGA3dShaderDestToken tmp = get_temp( emit );
1071
1072 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1073 return FALSE;
1074
1075 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1076 }
1077
1078 emit->dynamic_branching_level++;
1079
1080 return (emit_instruction( emit, if_token ) &&
1081 emit_src( emit, src0 ) &&
1082 emit_src( emit, zero ) );
1083 }
1084
1085
1086 static boolean
1087 emit_else(struct svga_shader_emitter *emit,
1088 const struct tgsi_full_instruction *insn)
1089 {
1090 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1091 }
1092
1093
1094 static boolean
1095 emit_endif(struct svga_shader_emitter *emit,
1096 const struct tgsi_full_instruction *insn)
1097 {
1098 emit->dynamic_branching_level--;
1099
1100 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1101 }
1102
1103
1104 /**
1105 * Translate the following TGSI FLR instruction.
1106 * FLR DST, SRC
1107 * To the following SVGA3D instruction sequence.
1108 * FRC TMP, SRC
1109 * SUB DST, SRC, TMP
1110 */
1111 static boolean
1112 emit_floor(struct svga_shader_emitter *emit,
1113 const struct tgsi_full_instruction *insn )
1114 {
1115 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1116 const struct src_register src0 =
1117 translate_src_register(emit, &insn->Src[0] );
1118 SVGA3dShaderDestToken temp = get_temp( emit );
1119
1120 /* FRC TMP, SRC */
1121 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1122 return FALSE;
1123
1124 /* SUB DST, SRC, TMP */
1125 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1126 negate( src( temp ) ) ))
1127 return FALSE;
1128
1129 return TRUE;
1130 }
1131
1132
1133 /**
1134 * Translate the following TGSI CEIL instruction.
1135 * CEIL DST, SRC
1136 * To the following SVGA3D instruction sequence.
1137 * FRC TMP, -SRC
1138 * ADD DST, SRC, TMP
1139 */
1140 static boolean
1141 emit_ceil(struct svga_shader_emitter *emit,
1142 const struct tgsi_full_instruction *insn)
1143 {
1144 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1145 const struct src_register src0 =
1146 translate_src_register(emit, &insn->Src[0]);
1147 SVGA3dShaderDestToken temp = get_temp(emit);
1148
1149 /* FRC TMP, -SRC */
1150 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1151 return FALSE;
1152
1153 /* ADD DST, SRC, TMP */
1154 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1155 return FALSE;
1156
1157 return TRUE;
1158 }
1159
1160
1161 /**
1162 * Translate the following TGSI DIV instruction.
1163 * DIV DST.xy, SRC0, SRC1
1164 * To the following SVGA3D instruction sequence.
1165 * RCP TMP.x, SRC1.xxxx
1166 * RCP TMP.y, SRC1.yyyy
1167 * MUL DST.xy, SRC0, TMP
1168 */
1169 static boolean
1170 emit_div(struct svga_shader_emitter *emit,
1171 const struct tgsi_full_instruction *insn )
1172 {
1173 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1174 const struct src_register src0 =
1175 translate_src_register(emit, &insn->Src[0] );
1176 const struct src_register src1 =
1177 translate_src_register(emit, &insn->Src[1] );
1178 SVGA3dShaderDestToken temp = get_temp( emit );
1179 unsigned i;
1180
1181 /* For each enabled element, perform a RCP instruction. Note that
1182 * RCP is scalar in SVGA3D:
1183 */
1184 for (i = 0; i < 4; i++) {
1185 unsigned channel = 1 << i;
1186 if (dst.mask & channel) {
1187 /* RCP TMP.?, SRC1.???? */
1188 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1189 writemask(temp, channel),
1190 scalar(src1, i) ))
1191 return FALSE;
1192 }
1193 }
1194
1195 /* Vector mul:
1196 * MUL DST, SRC0, TMP
1197 */
1198 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1199 src( temp ) ))
1200 return FALSE;
1201
1202 return TRUE;
1203 }
1204
1205
1206 /**
1207 * Translate the following TGSI DP2 instruction.
1208 * DP2 DST, SRC1, SRC2
1209 * To the following SVGA3D instruction sequence.
1210 * MUL TMP, SRC1, SRC2
1211 * ADD DST, TMP.xxxx, TMP.yyyy
1212 */
1213 static boolean
1214 emit_dp2(struct svga_shader_emitter *emit,
1215 const struct tgsi_full_instruction *insn )
1216 {
1217 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1218 const struct src_register src0 =
1219 translate_src_register(emit, &insn->Src[0]);
1220 const struct src_register src1 =
1221 translate_src_register(emit, &insn->Src[1]);
1222 SVGA3dShaderDestToken temp = get_temp( emit );
1223 struct src_register temp_src0, temp_src1;
1224
1225 /* MUL TMP, SRC1, SRC2 */
1226 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1227 return FALSE;
1228
1229 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1230 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1231
1232 /* ADD DST, TMP.xxxx, TMP.yyyy */
1233 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1234 temp_src0, temp_src1 ))
1235 return FALSE;
1236
1237 return TRUE;
1238 }
1239
1240
1241 /**
1242 * Translate the following TGSI DPH instruction.
1243 * DPH DST, SRC1, SRC2
1244 * To the following SVGA3D instruction sequence.
1245 * DP3 TMP, SRC1, SRC2
1246 * ADD DST, TMP, SRC2.wwww
1247 */
1248 static boolean
1249 emit_dph(struct svga_shader_emitter *emit,
1250 const struct tgsi_full_instruction *insn )
1251 {
1252 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1253 const struct src_register src0 = translate_src_register(
1254 emit, &insn->Src[0] );
1255 struct src_register src1 =
1256 translate_src_register(emit, &insn->Src[1]);
1257 SVGA3dShaderDestToken temp = get_temp( emit );
1258
1259 /* DP3 TMP, SRC1, SRC2 */
1260 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1261 return FALSE;
1262
1263 src1 = scalar(src1, TGSI_SWIZZLE_W);
1264
1265 /* ADD DST, TMP, SRC2.wwww */
1266 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1267 src( temp ), src1 ))
1268 return FALSE;
1269
1270 return TRUE;
1271 }
1272
1273
1274 /**
1275 * Sine / Cosine helper function.
1276 */
1277 static boolean
1278 do_emit_sincos(struct svga_shader_emitter *emit,
1279 SVGA3dShaderDestToken dst,
1280 struct src_register src0)
1281 {
1282 src0 = scalar(src0, TGSI_SWIZZLE_X);
1283 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1284 }
1285
1286
1287 /**
1288 * Translate/emit a TGSI SIN, COS or CSC instruction.
1289 */
1290 static boolean
1291 emit_sincos(struct svga_shader_emitter *emit,
1292 const struct tgsi_full_instruction *insn)
1293 {
1294 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1295 struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
1296 SVGA3dShaderDestToken temp = get_temp( emit );
1297
1298 /* SCS TMP SRC */
1299 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1300 return FALSE;
1301
1302 /* MOV DST TMP */
1303 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1304 return FALSE;
1305
1306 return TRUE;
1307 }
1308
1309
1310 /**
1311 * Translate TGSI SIN instruction into:
1312 * SCS TMP SRC
1313 * MOV DST TMP.yyyy
1314 */
1315 static boolean
1316 emit_sin(struct svga_shader_emitter *emit,
1317 const struct tgsi_full_instruction *insn )
1318 {
1319 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1320 struct src_register src0 =
1321 translate_src_register(emit, &insn->Src[0] );
1322 SVGA3dShaderDestToken temp = get_temp( emit );
1323
1324 /* SCS TMP SRC */
1325 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1326 return FALSE;
1327
1328 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1329
1330 /* MOV DST TMP.yyyy */
1331 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1332 return FALSE;
1333
1334 return TRUE;
1335 }
1336
1337
1338 /*
1339 * Translate TGSI COS instruction into:
1340 * SCS TMP SRC
1341 * MOV DST TMP.xxxx
1342 */
1343 static boolean
1344 emit_cos(struct svga_shader_emitter *emit,
1345 const struct tgsi_full_instruction *insn)
1346 {
1347 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1348 struct src_register src0 =
1349 translate_src_register(emit, &insn->Src[0] );
1350 SVGA3dShaderDestToken temp = get_temp( emit );
1351
1352 /* SCS TMP SRC */
1353 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1354 return FALSE;
1355
1356 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1357
1358 /* MOV DST TMP.xxxx */
1359 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1360 return FALSE;
1361
1362 return TRUE;
1363 }
1364
1365
1366 /**
1367 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1368 */
1369 static boolean
1370 emit_ssg(struct svga_shader_emitter *emit,
1371 const struct tgsi_full_instruction *insn)
1372 {
1373 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1374 struct src_register src0 =
1375 translate_src_register(emit, &insn->Src[0] );
1376 SVGA3dShaderDestToken temp0 = get_temp( emit );
1377 SVGA3dShaderDestToken temp1 = get_temp( emit );
1378 struct src_register zero, one;
1379
1380 if (emit->unit == PIPE_SHADER_VERTEX) {
1381 /* SGN DST, SRC0, TMP0, TMP1 */
1382 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1383 src( temp0 ), src( temp1 ) );
1384 }
1385
1386 one = get_one_immediate(emit);
1387 zero = get_zero_immediate(emit);
1388
1389 /* CMP TMP0, SRC0, one, zero */
1390 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1391 writemask( temp0, dst.mask ), src0, one, zero ))
1392 return FALSE;
1393
1394 /* CMP TMP1, negate(SRC0), negate(one), zero */
1395 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1396 writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1397 zero ))
1398 return FALSE;
1399
1400 /* ADD DST, TMP0, TMP1 */
1401 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1402 src( temp1 ) );
1403 }
1404
1405
1406 /**
1407 * Translate/emit TGSI SUB instruction as:
1408 * ADD DST, SRC0, negate(SRC1)
1409 */
1410 static boolean
1411 emit_sub(struct svga_shader_emitter *emit,
1412 const struct tgsi_full_instruction *insn)
1413 {
1414 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1415 struct src_register src0 = translate_src_register(
1416 emit, &insn->Src[0] );
1417 struct src_register src1 = translate_src_register(
1418 emit, &insn->Src[1] );
1419
1420 src1 = negate(src1);
1421
1422 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1423 src0, src1 ))
1424 return FALSE;
1425
1426 return TRUE;
1427 }
1428
1429
1430 /**
1431 * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
1432 */
1433 static boolean
1434 emit_kill_if(struct svga_shader_emitter *emit,
1435 const struct tgsi_full_instruction *insn)
1436 {
1437 const struct tgsi_full_src_register *reg = &insn->Src[0];
1438 struct src_register src0, srcIn;
1439 const boolean special = (reg->Register.Absolute ||
1440 reg->Register.Negate ||
1441 reg->Register.Indirect ||
1442 reg->Register.SwizzleX != 0 ||
1443 reg->Register.SwizzleY != 1 ||
1444 reg->Register.SwizzleZ != 2 ||
1445 reg->Register.File != TGSI_FILE_TEMPORARY);
1446 SVGA3dShaderDestToken temp;
1447
1448 src0 = srcIn = translate_src_register( emit, reg );
1449
1450 if (special) {
1451 /* need a temp reg */
1452 temp = get_temp( emit );
1453 }
1454
1455 if (special) {
1456 /* move the source into a temp register */
1457 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1458
1459 src0 = src( temp );
1460 }
1461
1462 /* Do the texkill by checking if any of the XYZW components are < 0.
1463 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1464 * only used XYZ. The MSDN documentation about this is incorrect.
1465 */
1466 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1467 return FALSE;
1468
1469 return TRUE;
1470 }
1471
1472
1473 /**
1474 * Translate/emit unconditional kill instruction (usually found inside
1475 * an IF/ELSE/ENDIF block).
1476 */
1477 static boolean
1478 emit_kill(struct svga_shader_emitter *emit,
1479 const struct tgsi_full_instruction *insn)
1480 {
1481 SVGA3dShaderDestToken temp;
1482 struct src_register one = get_one_immediate(emit);
1483 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1484
1485 /* texkill doesn't allow negation on the operand so lets move
1486 * negation of {1} to a temp register */
1487 temp = get_temp( emit );
1488 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1489 negate( one ) ))
1490 return FALSE;
1491
1492 return submit_op0( emit, inst, temp );
1493 }
1494
1495
1496 /**
1497 * Test if r1 and r2 are the same register.
1498 */
1499 static boolean
1500 same_register(struct src_register r1, struct src_register r2)
1501 {
1502 return (r1.base.num == r2.base.num &&
1503 r1.base.type_upper == r2.base.type_upper &&
1504 r1.base.type_lower == r2.base.type_lower);
1505 }
1506
1507
1508
1509 /**
1510 * Implement conditionals by initializing destination reg to 'fail',
1511 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1512 * based on predicate reg.
1513 *
1514 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1515 * MOV dst, fail
1516 * MOV dst, pass, p0
1517 */
1518 static boolean
1519 emit_conditional(struct svga_shader_emitter *emit,
1520 unsigned compare_func,
1521 SVGA3dShaderDestToken dst,
1522 struct src_register src0,
1523 struct src_register src1,
1524 struct src_register pass,
1525 struct src_register fail)
1526 {
1527 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1528 SVGA3dShaderInstToken setp_token;
1529
1530 switch (compare_func) {
1531 case PIPE_FUNC_NEVER:
1532 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1533 dst, fail );
1534 break;
1535 case PIPE_FUNC_LESS:
1536 setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1537 break;
1538 case PIPE_FUNC_EQUAL:
1539 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1540 break;
1541 case PIPE_FUNC_LEQUAL:
1542 setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1543 break;
1544 case PIPE_FUNC_GREATER:
1545 setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1546 break;
1547 case PIPE_FUNC_NOTEQUAL:
1548 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1549 break;
1550 case PIPE_FUNC_GEQUAL:
1551 setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1552 break;
1553 case PIPE_FUNC_ALWAYS:
1554 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1555 dst, pass );
1556 break;
1557 }
1558
1559 if (same_register(src(dst), pass)) {
1560 /* We'll get bad results if the dst and pass registers are the same
1561 * so use a temp register containing pass.
1562 */
1563 SVGA3dShaderDestToken temp = get_temp(emit);
1564 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1565 return FALSE;
1566 pass = src(temp);
1567 }
1568
1569 /* SETP src0, COMPOP, src1 */
1570 if (!submit_op2( emit, setp_token, pred_reg,
1571 src0, src1 ))
1572 return FALSE;
1573
1574 /* MOV dst, fail */
1575 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1576 return FALSE;
1577
1578 /* MOV dst, pass (predicated)
1579 *
1580 * Note that the predicate reg (and possible modifiers) is passed
1581 * as the first source argument.
1582 */
1583 if (!submit_op2(emit,
1584 inst_token_predicated(SVGA3DOP_MOV), dst,
1585 src(pred_reg), pass))
1586 return FALSE;
1587
1588 return TRUE;
1589 }
1590
1591
1592 /**
1593 * Helper for emiting 'selection' commands. Basically:
1594 * if (src0 OP src1)
1595 * dst = 1.0;
1596 * else
1597 * dst = 0.0;
1598 */
1599 static boolean
1600 emit_select(struct svga_shader_emitter *emit,
1601 unsigned compare_func,
1602 SVGA3dShaderDestToken dst,
1603 struct src_register src0,
1604 struct src_register src1 )
1605 {
1606 /* There are some SVGA instructions which implement some selects
1607 * directly, but they are only available in the vertex shader.
1608 */
1609 if (emit->unit == PIPE_SHADER_VERTEX) {
1610 switch (compare_func) {
1611 case PIPE_FUNC_GEQUAL:
1612 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1613 case PIPE_FUNC_LEQUAL:
1614 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1615 case PIPE_FUNC_GREATER:
1616 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1617 case PIPE_FUNC_LESS:
1618 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1619 default:
1620 break;
1621 }
1622 }
1623
1624 /* Otherwise, need to use the setp approach:
1625 */
1626 {
1627 struct src_register one, zero;
1628 /* zero immediate is 0,0,0,1 */
1629 zero = get_zero_immediate(emit);
1630 one = get_one_immediate(emit);
1631
1632 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1633 }
1634 }
1635
1636
1637 /**
1638 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1639 */
1640 static boolean
1641 emit_select_op(struct svga_shader_emitter *emit,
1642 unsigned compare,
1643 const struct tgsi_full_instruction *insn)
1644 {
1645 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1646 struct src_register src0 = translate_src_register(
1647 emit, &insn->Src[0] );
1648 struct src_register src1 = translate_src_register(
1649 emit, &insn->Src[1] );
1650
1651 return emit_select( emit, compare, dst, src0, src1 );
1652 }
1653
1654
1655 /**
1656 * Translate TGSI CMP instruction. Component-wise:
1657 * dst = (src0 < 0.0) ? src1 : src2
1658 */
1659 static boolean
1660 emit_cmp(struct svga_shader_emitter *emit,
1661 const struct tgsi_full_instruction *insn)
1662 {
1663 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1664 const struct src_register src0 =
1665 translate_src_register(emit, &insn->Src[0] );
1666 const struct src_register src1 =
1667 translate_src_register(emit, &insn->Src[1] );
1668 const struct src_register src2 =
1669 translate_src_register(emit, &insn->Src[2] );
1670
1671 if (emit->unit == PIPE_SHADER_VERTEX) {
1672 struct src_register zero = get_zero_immediate(emit);
1673 /* We used to simulate CMP with SLT+LRP. But that didn't work when
1674 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed
1675 * because it involves a CMP to handle the 0 case.
1676 * Use a conditional expression instead.
1677 */
1678 return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1679 src0, zero, src1, src2);
1680 }
1681 else {
1682 assert(emit->unit == PIPE_SHADER_FRAGMENT);
1683
1684 /* CMP DST, SRC0, SRC2, SRC1 */
1685 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1686 src0, src2, src1);
1687 }
1688 }
1689
1690
1691 /**
1692 * Translate/emit 2-operand (coord, sampler) texture instructions.
1693 */
1694 static boolean
1695 emit_tex2(struct svga_shader_emitter *emit,
1696 const struct tgsi_full_instruction *insn,
1697 SVGA3dShaderDestToken dst)
1698 {
1699 SVGA3dShaderInstToken inst;
1700 struct src_register texcoord;
1701 struct src_register sampler;
1702 SVGA3dShaderDestToken tmp;
1703
1704 inst.value = 0;
1705
1706 switch (insn->Instruction.Opcode) {
1707 case TGSI_OPCODE_TEX:
1708 inst.op = SVGA3DOP_TEX;
1709 break;
1710 case TGSI_OPCODE_TXP:
1711 inst.op = SVGA3DOP_TEX;
1712 inst.control = SVGA3DOPCONT_PROJECT;
1713 break;
1714 case TGSI_OPCODE_TXB:
1715 inst.op = SVGA3DOP_TEX;
1716 inst.control = SVGA3DOPCONT_BIAS;
1717 break;
1718 case TGSI_OPCODE_TXL:
1719 inst.op = SVGA3DOP_TEXLDL;
1720 break;
1721 default:
1722 assert(0);
1723 return FALSE;
1724 }
1725
1726 texcoord = translate_src_register( emit, &insn->Src[0] );
1727 sampler = translate_src_register( emit, &insn->Src[1] );
1728
1729 if (emit->key.tex[sampler.base.num].unnormalized ||
1730 emit->dynamic_branching_level > 0)
1731 tmp = get_temp( emit );
1732
1733 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1734 * zero in that case.
1735 */
1736 if (emit->dynamic_branching_level > 0 &&
1737 inst.op == SVGA3DOP_TEX &&
1738 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1739 struct src_register zero = get_zero_immediate(emit);
1740
1741 /* MOV tmp, texcoord */
1742 if (!submit_op1( emit,
1743 inst_token( SVGA3DOP_MOV ),
1744 tmp,
1745 texcoord ))
1746 return FALSE;
1747
1748 /* MOV tmp.w, zero */
1749 if (!submit_op1( emit,
1750 inst_token( SVGA3DOP_MOV ),
1751 writemask( tmp, TGSI_WRITEMASK_W ),
1752 zero ))
1753 return FALSE;
1754
1755 texcoord = src( tmp );
1756 inst.op = SVGA3DOP_TEXLDL;
1757 }
1758
1759 /* Explicit normalization of texcoords:
1760 */
1761 if (emit->key.tex[sampler.base.num].unnormalized) {
1762 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1763
1764 /* MUL tmp, SRC0, WH */
1765 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1766 tmp, texcoord, wh ))
1767 return FALSE;
1768
1769 texcoord = src( tmp );
1770 }
1771
1772 return submit_op2( emit, inst, dst, texcoord, sampler );
1773 }
1774
1775
1776 /**
1777 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1778 */
1779 static boolean
1780 emit_tex4(struct svga_shader_emitter *emit,
1781 const struct tgsi_full_instruction *insn,
1782 SVGA3dShaderDestToken dst )
1783 {
1784 SVGA3dShaderInstToken inst;
1785 struct src_register texcoord;
1786 struct src_register ddx;
1787 struct src_register ddy;
1788 struct src_register sampler;
1789
1790 texcoord = translate_src_register( emit, &insn->Src[0] );
1791 ddx = translate_src_register( emit, &insn->Src[1] );
1792 ddy = translate_src_register( emit, &insn->Src[2] );
1793 sampler = translate_src_register( emit, &insn->Src[3] );
1794
1795 inst.value = 0;
1796
1797 switch (insn->Instruction.Opcode) {
1798 case TGSI_OPCODE_TXD:
1799 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1800 break;
1801 default:
1802 assert(0);
1803 return FALSE;
1804 }
1805
1806 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1807 }
1808
1809
1810 /**
1811 * Emit texture swizzle code. We do this here since SVGA samplers don't
1812 * directly support swizzles.
1813 */
1814 static boolean
1815 emit_tex_swizzle(struct svga_shader_emitter *emit,
1816 SVGA3dShaderDestToken dst,
1817 struct src_register src,
1818 unsigned swizzle_x,
1819 unsigned swizzle_y,
1820 unsigned swizzle_z,
1821 unsigned swizzle_w)
1822 {
1823 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1824 unsigned srcSwizzle[4];
1825 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1826 unsigned i;
1827
1828 /* build writemasks and srcSwizzle terms */
1829 for (i = 0; i < 4; i++) {
1830 if (swizzleIn[i] == PIPE_SWIZZLE_0) {
1831 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1832 zeroWritemask |= (1 << i);
1833 }
1834 else if (swizzleIn[i] == PIPE_SWIZZLE_1) {
1835 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1836 oneWritemask |= (1 << i);
1837 }
1838 else {
1839 srcSwizzle[i] = swizzleIn[i];
1840 srcWritemask |= (1 << i);
1841 }
1842 }
1843
1844 /* write x/y/z/w comps */
1845 if (dst.mask & srcWritemask) {
1846 if (!submit_op1(emit,
1847 inst_token(SVGA3DOP_MOV),
1848 writemask(dst, srcWritemask),
1849 swizzle(src,
1850 srcSwizzle[0],
1851 srcSwizzle[1],
1852 srcSwizzle[2],
1853 srcSwizzle[3])))
1854 return FALSE;
1855 }
1856
1857 /* write 0 comps */
1858 if (dst.mask & zeroWritemask) {
1859 if (!submit_op1(emit,
1860 inst_token(SVGA3DOP_MOV),
1861 writemask(dst, zeroWritemask),
1862 get_zero_immediate(emit)))
1863 return FALSE;
1864 }
1865
1866 /* write 1 comps */
1867 if (dst.mask & oneWritemask) {
1868 if (!submit_op1(emit,
1869 inst_token(SVGA3DOP_MOV),
1870 writemask(dst, oneWritemask),
1871 get_one_immediate(emit)))
1872 return FALSE;
1873 }
1874
1875 return TRUE;
1876 }
1877
1878
1879 /**
1880 * Translate/emit a TGSI texture sample instruction.
1881 */
1882 static boolean
1883 emit_tex(struct svga_shader_emitter *emit,
1884 const struct tgsi_full_instruction *insn)
1885 {
1886 SVGA3dShaderDestToken dst =
1887 translate_dst_register( emit, insn, 0 );
1888 struct src_register src0 =
1889 translate_src_register( emit, &insn->Src[0] );
1890 struct src_register src1 =
1891 translate_src_register( emit, &insn->Src[1] );
1892
1893 SVGA3dShaderDestToken tex_result;
1894 const unsigned unit = src1.base.num;
1895
1896 /* check for shadow samplers */
1897 boolean compare = (emit->key.tex[unit].compare_mode ==
1898 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1899
1900 /* texture swizzle */
1901 boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X ||
1902 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y ||
1903 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z ||
1904 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W);
1905
1906 boolean saturate = insn->Instruction.Saturate;
1907
1908 /* If doing compare processing or tex swizzle or saturation, we need to put
1909 * the fetched color into a temporary so it can be used as a source later on.
1910 */
1911 if (compare || swizzle || saturate) {
1912 tex_result = get_temp( emit );
1913 }
1914 else {
1915 tex_result = dst;
1916 }
1917
1918 switch(insn->Instruction.Opcode) {
1919 case TGSI_OPCODE_TEX:
1920 case TGSI_OPCODE_TXB:
1921 case TGSI_OPCODE_TXP:
1922 case TGSI_OPCODE_TXL:
1923 if (!emit_tex2( emit, insn, tex_result ))
1924 return FALSE;
1925 break;
1926 case TGSI_OPCODE_TXD:
1927 if (!emit_tex4( emit, insn, tex_result ))
1928 return FALSE;
1929 break;
1930 default:
1931 assert(0);
1932 }
1933
1934 if (compare) {
1935 SVGA3dShaderDestToken dst2;
1936
1937 if (swizzle || saturate)
1938 dst2 = tex_result;
1939 else
1940 dst2 = dst;
1941
1942 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1943 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1944 /* When sampling a depth texture, the result of the comparison is in
1945 * the Y component.
1946 */
1947 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1948 struct src_register r_coord;
1949
1950 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1951 /* Divide texcoord R by Q */
1952 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1953 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1954 scalar(src0, TGSI_SWIZZLE_W) ))
1955 return FALSE;
1956
1957 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1958 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1959 scalar(src0, TGSI_SWIZZLE_Z),
1960 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1961 return FALSE;
1962
1963 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1964 }
1965 else {
1966 r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1967 }
1968
1969 /* Compare texture sample value against R component of texcoord */
1970 if (!emit_select(emit,
1971 emit->key.tex[unit].compare_func,
1972 writemask( dst2, TGSI_WRITEMASK_XYZ ),
1973 r_coord,
1974 tex_src_x))
1975 return FALSE;
1976 }
1977
1978 if (dst.mask & TGSI_WRITEMASK_W) {
1979 struct src_register one = get_one_immediate(emit);
1980
1981 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1982 writemask( dst2, TGSI_WRITEMASK_W ),
1983 one ))
1984 return FALSE;
1985 }
1986 }
1987
1988 if (saturate && !swizzle) {
1989 /* MOV_SAT real_dst, dst */
1990 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1991 return FALSE;
1992 }
1993 else if (swizzle) {
1994 /* swizzle from tex_result to dst (handles saturation too, if any) */
1995 emit_tex_swizzle(emit,
1996 dst, src(tex_result),
1997 emit->key.tex[unit].swizzle_r,
1998 emit->key.tex[unit].swizzle_g,
1999 emit->key.tex[unit].swizzle_b,
2000 emit->key.tex[unit].swizzle_a);
2001 }
2002
2003 return TRUE;
2004 }
2005
2006
2007 static boolean
2008 emit_bgnloop(struct svga_shader_emitter *emit,
2009 const struct tgsi_full_instruction *insn)
2010 {
2011 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
2012 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
2013 struct src_register const_int = get_loop_const( emit );
2014
2015 emit->dynamic_branching_level++;
2016
2017 return (emit_instruction( emit, inst ) &&
2018 emit_src( emit, loop_reg ) &&
2019 emit_src( emit, const_int ) );
2020 }
2021
2022
2023 static boolean
2024 emit_endloop(struct svga_shader_emitter *emit,
2025 const struct tgsi_full_instruction *insn)
2026 {
2027 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
2028
2029 emit->dynamic_branching_level--;
2030
2031 return emit_instruction( emit, inst );
2032 }
2033
2034
2035 /**
2036 * Translate/emit TGSI BREAK (out of loop) instruction.
2037 */
2038 static boolean
2039 emit_brk(struct svga_shader_emitter *emit,
2040 const struct tgsi_full_instruction *insn)
2041 {
2042 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
2043 return emit_instruction( emit, inst );
2044 }
2045
2046
2047 /**
2048 * Emit simple instruction which operates on one scalar value (not
2049 * a vector). Ex: LG2, RCP, RSQ.
2050 */
2051 static boolean
2052 emit_scalar_op1(struct svga_shader_emitter *emit,
2053 unsigned opcode,
2054 const struct tgsi_full_instruction *insn)
2055 {
2056 SVGA3dShaderInstToken inst;
2057 SVGA3dShaderDestToken dst;
2058 struct src_register src;
2059
2060 inst = inst_token( opcode );
2061 dst = translate_dst_register( emit, insn, 0 );
2062 src = translate_src_register( emit, &insn->Src[0] );
2063 src = scalar( src, TGSI_SWIZZLE_X );
2064
2065 return submit_op1( emit, inst, dst, src );
2066 }
2067
2068
2069 /**
2070 * Translate/emit a simple instruction (one which has no special-case
2071 * code) such as ADD, MUL, MIN, MAX.
2072 */
2073 static boolean
2074 emit_simple_instruction(struct svga_shader_emitter *emit,
2075 unsigned opcode,
2076 const struct tgsi_full_instruction *insn)
2077 {
2078 const struct tgsi_full_src_register *src = insn->Src;
2079 SVGA3dShaderInstToken inst;
2080 SVGA3dShaderDestToken dst;
2081
2082 inst = inst_token( opcode );
2083 dst = translate_dst_register( emit, insn, 0 );
2084
2085 switch (insn->Instruction.NumSrcRegs) {
2086 case 0:
2087 return submit_op0( emit, inst, dst );
2088 case 1:
2089 return submit_op1( emit, inst, dst,
2090 translate_src_register( emit, &src[0] ));
2091 case 2:
2092 return submit_op2( emit, inst, dst,
2093 translate_src_register( emit, &src[0] ),
2094 translate_src_register( emit, &src[1] ) );
2095 case 3:
2096 return submit_op3( emit, inst, dst,
2097 translate_src_register( emit, &src[0] ),
2098 translate_src_register( emit, &src[1] ),
2099 translate_src_register( emit, &src[2] ) );
2100 default:
2101 assert(0);
2102 return FALSE;
2103 }
2104 }
2105
2106
2107 /**
2108 * TGSI_OPCODE_MOVE is only special-cased here to detect the
2109 * svga_fragment_shader::constant_color_output case.
2110 */
2111 static boolean
2112 emit_mov(struct svga_shader_emitter *emit,
2113 const struct tgsi_full_instruction *insn)
2114 {
2115 const struct tgsi_full_src_register *src = &insn->Src[0];
2116 const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2117
2118 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2119 dst->Register.File == TGSI_FILE_OUTPUT &&
2120 dst->Register.Index == 0 &&
2121 src->Register.File == TGSI_FILE_CONSTANT &&
2122 !src->Register.Indirect) {
2123 emit->constant_color_output = TRUE;
2124 }
2125
2126 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2127 }
2128
2129
2130 /**
2131 * Translate/emit TGSI DDX, DDY instructions.
2132 */
2133 static boolean
2134 emit_deriv(struct svga_shader_emitter *emit,
2135 const struct tgsi_full_instruction *insn )
2136 {
2137 if (emit->dynamic_branching_level > 0 &&
2138 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2139 {
2140 SVGA3dShaderDestToken dst =
2141 translate_dst_register( emit, insn, 0 );
2142
2143 /* Deriv opcodes not valid inside dynamic branching, workaround
2144 * by zeroing out the destination.
2145 */
2146 if (!submit_op1(emit,
2147 inst_token( SVGA3DOP_MOV ),
2148 dst,
2149 get_zero_immediate(emit)))
2150 return FALSE;
2151
2152 return TRUE;
2153 }
2154 else {
2155 unsigned opcode;
2156 const struct tgsi_full_src_register *reg = &insn->Src[0];
2157 SVGA3dShaderInstToken inst;
2158 SVGA3dShaderDestToken dst;
2159 struct src_register src0;
2160
2161 switch (insn->Instruction.Opcode) {
2162 case TGSI_OPCODE_DDX:
2163 opcode = SVGA3DOP_DSX;
2164 break;
2165 case TGSI_OPCODE_DDY:
2166 opcode = SVGA3DOP_DSY;
2167 break;
2168 default:
2169 return FALSE;
2170 }
2171
2172 inst = inst_token( opcode );
2173 dst = translate_dst_register( emit, insn, 0 );
2174 src0 = translate_src_register( emit, reg );
2175
2176 /* We cannot use negate or abs on source to dsx/dsy instruction.
2177 */
2178 if (reg->Register.Absolute ||
2179 reg->Register.Negate) {
2180 SVGA3dShaderDestToken temp = get_temp( emit );
2181
2182 if (!emit_repl( emit, temp, &src0 ))
2183 return FALSE;
2184 }
2185
2186 return submit_op1( emit, inst, dst, src0 );
2187 }
2188 }
2189
2190
2191 /**
2192 * Translate/emit ARL (Address Register Load) instruction. Used to
2193 * move a value into the special 'address' register. Used to implement
2194 * indirect/variable indexing into arrays.
2195 */
2196 static boolean
2197 emit_arl(struct svga_shader_emitter *emit,
2198 const struct tgsi_full_instruction *insn)
2199 {
2200 ++emit->current_arl;
2201 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2202 /* MOVA not present in pixel shader instruction set.
2203 * Ignore this instruction altogether since it is
2204 * only used for loop counters -- and for that
2205 * we reference aL directly.
2206 */
2207 return TRUE;
2208 }
2209 if (svga_arl_needs_adjustment( emit )) {
2210 return emit_fake_arl( emit, insn );
2211 } else {
2212 /* no need to adjust, just emit straight arl */
2213 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2214 }
2215 }
2216
2217
2218 static boolean
2219 emit_pow(struct svga_shader_emitter *emit,
2220 const struct tgsi_full_instruction *insn)
2221 {
2222 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2223 struct src_register src0 = translate_src_register(
2224 emit, &insn->Src[0] );
2225 struct src_register src1 = translate_src_register(
2226 emit, &insn->Src[1] );
2227 boolean need_tmp = FALSE;
2228
2229 /* POW can only output to a temporary */
2230 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2231 need_tmp = TRUE;
2232
2233 /* POW src1 must not be the same register as dst */
2234 if (alias_src_dst( src1, dst ))
2235 need_tmp = TRUE;
2236
2237 /* it's a scalar op */
2238 src0 = scalar( src0, TGSI_SWIZZLE_X );
2239 src1 = scalar( src1, TGSI_SWIZZLE_X );
2240
2241 if (need_tmp) {
2242 SVGA3dShaderDestToken tmp =
2243 writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2244
2245 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2246 return FALSE;
2247
2248 return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2249 dst, scalar(src(tmp), 0) );
2250 }
2251 else {
2252 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2253 }
2254 }
2255
2256
2257 /**
2258 * Translate/emit TGSI XPD (vector cross product) instruction.
2259 */
2260 static boolean
2261 emit_xpd(struct svga_shader_emitter *emit,
2262 const struct tgsi_full_instruction *insn)
2263 {
2264 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2265 const struct src_register src0 = translate_src_register(
2266 emit, &insn->Src[0] );
2267 const struct src_register src1 = translate_src_register(
2268 emit, &insn->Src[1] );
2269 boolean need_dst_tmp = FALSE;
2270
2271 /* XPD can only output to a temporary */
2272 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
2273 need_dst_tmp = TRUE;
2274
2275 /* The dst reg must not be the same as src0 or src1*/
2276 if (alias_src_dst(src0, dst) ||
2277 alias_src_dst(src1, dst))
2278 need_dst_tmp = TRUE;
2279
2280 if (need_dst_tmp) {
2281 SVGA3dShaderDestToken tmp = get_temp( emit );
2282
2283 /* Obey DX9 restrictions on mask:
2284 */
2285 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
2286
2287 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
2288 return FALSE;
2289
2290 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2291 return FALSE;
2292 }
2293 else {
2294 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
2295 return FALSE;
2296 }
2297
2298 /* Need to emit 1.0 to dst.w?
2299 */
2300 if (dst.mask & TGSI_WRITEMASK_W) {
2301 struct src_register one = get_one_immediate( emit );
2302
2303 if (!submit_op1(emit,
2304 inst_token( SVGA3DOP_MOV ),
2305 writemask(dst, TGSI_WRITEMASK_W),
2306 one))
2307 return FALSE;
2308 }
2309
2310 return TRUE;
2311 }
2312
2313
2314 /**
2315 * Emit a LRP (linear interpolation) instruction.
2316 */
2317 static boolean
2318 submit_lrp(struct svga_shader_emitter *emit,
2319 SVGA3dShaderDestToken dst,
2320 struct src_register src0,
2321 struct src_register src1,
2322 struct src_register src2)
2323 {
2324 SVGA3dShaderDestToken tmp;
2325 boolean need_dst_tmp = FALSE;
2326
2327 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2328 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2329 alias_src_dst(src0, dst) ||
2330 alias_src_dst(src2, dst))
2331 need_dst_tmp = TRUE;
2332
2333 if (need_dst_tmp) {
2334 tmp = get_temp( emit );
2335 tmp.mask = dst.mask;
2336 }
2337 else {
2338 tmp = dst;
2339 }
2340
2341 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2342 return FALSE;
2343
2344 if (need_dst_tmp) {
2345 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2346 return FALSE;
2347 }
2348
2349 return TRUE;
2350 }
2351
2352
2353 /**
2354 * Translate/emit LRP (Linear Interpolation) instruction.
2355 */
2356 static boolean
2357 emit_lrp(struct svga_shader_emitter *emit,
2358 const struct tgsi_full_instruction *insn)
2359 {
2360 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2361 const struct src_register src0 = translate_src_register(
2362 emit, &insn->Src[0] );
2363 const struct src_register src1 = translate_src_register(
2364 emit, &insn->Src[1] );
2365 const struct src_register src2 = translate_src_register(
2366 emit, &insn->Src[2] );
2367
2368 return submit_lrp(emit, dst, src0, src1, src2);
2369 }
2370
2371 /**
2372 * Translate/emit DST (Distance function) instruction.
2373 */
2374 static boolean
2375 emit_dst_insn(struct svga_shader_emitter *emit,
2376 const struct tgsi_full_instruction *insn)
2377 {
2378 if (emit->unit == PIPE_SHADER_VERTEX) {
2379 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2380 */
2381 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2382 }
2383 else {
2384 /* result[0] = 1 * 1;
2385 * result[1] = a[1] * b[1];
2386 * result[2] = a[2] * 1;
2387 * result[3] = 1 * b[3];
2388 */
2389 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2390 SVGA3dShaderDestToken tmp;
2391 const struct src_register src0 = translate_src_register(
2392 emit, &insn->Src[0] );
2393 const struct src_register src1 = translate_src_register(
2394 emit, &insn->Src[1] );
2395 boolean need_tmp = FALSE;
2396
2397 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2398 alias_src_dst(src0, dst) ||
2399 alias_src_dst(src1, dst))
2400 need_tmp = TRUE;
2401
2402 if (need_tmp) {
2403 tmp = get_temp( emit );
2404 }
2405 else {
2406 tmp = dst;
2407 }
2408
2409 /* tmp.xw = 1.0
2410 */
2411 if (tmp.mask & TGSI_WRITEMASK_XW) {
2412 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2413 writemask(tmp, TGSI_WRITEMASK_XW ),
2414 get_one_immediate(emit)))
2415 return FALSE;
2416 }
2417
2418 /* tmp.yz = src0
2419 */
2420 if (tmp.mask & TGSI_WRITEMASK_YZ) {
2421 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2422 writemask(tmp, TGSI_WRITEMASK_YZ ),
2423 src0))
2424 return FALSE;
2425 }
2426
2427 /* tmp.yw = tmp * src1
2428 */
2429 if (tmp.mask & TGSI_WRITEMASK_YW) {
2430 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2431 writemask(tmp, TGSI_WRITEMASK_YW ),
2432 src(tmp),
2433 src1))
2434 return FALSE;
2435 }
2436
2437 /* dst = tmp
2438 */
2439 if (need_tmp) {
2440 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2441 dst,
2442 src(tmp)))
2443 return FALSE;
2444 }
2445 }
2446
2447 return TRUE;
2448 }
2449
2450
2451 static boolean
2452 emit_exp(struct svga_shader_emitter *emit,
2453 const struct tgsi_full_instruction *insn)
2454 {
2455 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2456 struct src_register src0 =
2457 translate_src_register( emit, &insn->Src[0] );
2458 SVGA3dShaderDestToken fraction;
2459
2460 if (dst.mask & TGSI_WRITEMASK_Y)
2461 fraction = dst;
2462 else if (dst.mask & TGSI_WRITEMASK_X)
2463 fraction = get_temp( emit );
2464 else
2465 fraction.value = 0;
2466
2467 /* If y is being written, fill it with src0 - floor(src0).
2468 */
2469 if (dst.mask & TGSI_WRITEMASK_XY) {
2470 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2471 writemask( fraction, TGSI_WRITEMASK_Y ),
2472 src0 ))
2473 return FALSE;
2474 }
2475
2476 /* If x is being written, fill it with 2 ^ floor(src0).
2477 */
2478 if (dst.mask & TGSI_WRITEMASK_X) {
2479 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2480 writemask( dst, TGSI_WRITEMASK_X ),
2481 src0,
2482 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2483 return FALSE;
2484
2485 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2486 writemask( dst, TGSI_WRITEMASK_X ),
2487 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2488 return FALSE;
2489
2490 if (!(dst.mask & TGSI_WRITEMASK_Y))
2491 release_temp( emit, fraction );
2492 }
2493
2494 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2495 */
2496 if (dst.mask & TGSI_WRITEMASK_Z) {
2497 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2498 writemask( dst, TGSI_WRITEMASK_Z ),
2499 src0 ) )
2500 return FALSE;
2501 }
2502
2503 /* If w is being written, fill it with one.
2504 */
2505 if (dst.mask & TGSI_WRITEMASK_W) {
2506 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2507 writemask(dst, TGSI_WRITEMASK_W),
2508 get_one_immediate(emit)))
2509 return FALSE;
2510 }
2511
2512 return TRUE;
2513 }
2514
2515
2516 /**
2517 * Translate/emit LIT (Lighting helper) instruction.
2518 */
2519 static boolean
2520 emit_lit(struct svga_shader_emitter *emit,
2521 const struct tgsi_full_instruction *insn)
2522 {
2523 if (emit->unit == PIPE_SHADER_VERTEX) {
2524 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2525 */
2526 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2527 }
2528 else {
2529 /* D3D vs. GL semantics can be fairly easily accomodated by
2530 * variations on this sequence.
2531 *
2532 * GL:
2533 * tmp.y = src.x
2534 * tmp.z = pow(src.y,src.w)
2535 * p0 = src0.xxxx > 0
2536 * result = zero.wxxw
2537 * (p0) result.yz = tmp
2538 *
2539 * D3D:
2540 * tmp.y = src.x
2541 * tmp.z = pow(src.y,src.w)
2542 * p0 = src0.xxyy > 0
2543 * result = zero.wxxw
2544 * (p0) result.yz = tmp
2545 *
2546 * Will implement the GL version for now.
2547 */
2548 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2549 SVGA3dShaderDestToken tmp = get_temp( emit );
2550 const struct src_register src0 = translate_src_register(
2551 emit, &insn->Src[0] );
2552
2553 /* tmp = pow(src.y, src.w)
2554 */
2555 if (dst.mask & TGSI_WRITEMASK_Z) {
2556 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2557 tmp,
2558 scalar(src0, 1),
2559 scalar(src0, 3)))
2560 return FALSE;
2561 }
2562
2563 /* tmp.y = src.x
2564 */
2565 if (dst.mask & TGSI_WRITEMASK_Y) {
2566 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2567 writemask(tmp, TGSI_WRITEMASK_Y ),
2568 scalar(src0, 0)))
2569 return FALSE;
2570 }
2571
2572 /* Can't quite do this with emit conditional due to the extra
2573 * writemask on the predicated mov:
2574 */
2575 {
2576 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2577 struct src_register predsrc;
2578
2579 /* D3D vs GL semantics:
2580 */
2581 if (0)
2582 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2583 else
2584 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2585
2586 /* SETP src0.xxyy, GT, {0}.x */
2587 if (!submit_op2( emit,
2588 inst_token_setp(SVGA3DOPCOMP_GT),
2589 pred_reg,
2590 predsrc,
2591 get_zero_immediate(emit)))
2592 return FALSE;
2593
2594 /* MOV dst, fail */
2595 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2596 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2597 return FALSE;
2598
2599 /* MOV dst.yz, tmp (predicated)
2600 *
2601 * Note that the predicate reg (and possible modifiers) is passed
2602 * as the first source argument.
2603 */
2604 if (dst.mask & TGSI_WRITEMASK_YZ) {
2605 if (!submit_op2( emit,
2606 inst_token_predicated(SVGA3DOP_MOV),
2607 writemask(dst, TGSI_WRITEMASK_YZ),
2608 src( pred_reg ), src( tmp ) ))
2609 return FALSE;
2610 }
2611 }
2612 }
2613
2614 return TRUE;
2615 }
2616
2617
2618 static boolean
2619 emit_ex2(struct svga_shader_emitter *emit,
2620 const struct tgsi_full_instruction *insn)
2621 {
2622 SVGA3dShaderInstToken inst;
2623 SVGA3dShaderDestToken dst;
2624 struct src_register src0;
2625
2626 inst = inst_token( SVGA3DOP_EXP );
2627 dst = translate_dst_register( emit, insn, 0 );
2628 src0 = translate_src_register( emit, &insn->Src[0] );
2629 src0 = scalar( src0, TGSI_SWIZZLE_X );
2630
2631 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2632 SVGA3dShaderDestToken tmp = get_temp( emit );
2633
2634 if (!submit_op1( emit, inst, tmp, src0 ))
2635 return FALSE;
2636
2637 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2638 dst,
2639 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2640 }
2641
2642 return submit_op1( emit, inst, dst, src0 );
2643 }
2644
2645
2646 static boolean
2647 emit_log(struct svga_shader_emitter *emit,
2648 const struct tgsi_full_instruction *insn)
2649 {
2650 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2651 struct src_register src0 =
2652 translate_src_register( emit, &insn->Src[0] );
2653 SVGA3dShaderDestToken abs_tmp;
2654 struct src_register abs_src0;
2655 SVGA3dShaderDestToken log2_abs;
2656
2657 abs_tmp.value = 0;
2658
2659 if (dst.mask & TGSI_WRITEMASK_Z)
2660 log2_abs = dst;
2661 else if (dst.mask & TGSI_WRITEMASK_XY)
2662 log2_abs = get_temp( emit );
2663 else
2664 log2_abs.value = 0;
2665
2666 /* If z is being written, fill it with log2( abs( src0 ) ).
2667 */
2668 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2669 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2670 abs_src0 = src0;
2671 else {
2672 abs_tmp = get_temp( emit );
2673
2674 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2675 abs_tmp,
2676 src0 ) )
2677 return FALSE;
2678
2679 abs_src0 = src( abs_tmp );
2680 }
2681
2682 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2683
2684 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2685 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2686 abs_src0 ) )
2687 return FALSE;
2688 }
2689
2690 if (dst.mask & TGSI_WRITEMASK_XY) {
2691 SVGA3dShaderDestToken floor_log2;
2692
2693 if (dst.mask & TGSI_WRITEMASK_X)
2694 floor_log2 = dst;
2695 else
2696 floor_log2 = get_temp( emit );
2697
2698 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2699 */
2700 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2701 writemask( floor_log2, TGSI_WRITEMASK_X ),
2702 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2703 return FALSE;
2704
2705 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2706 writemask( floor_log2, TGSI_WRITEMASK_X ),
2707 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2708 negate( src( floor_log2 ) ) ) )
2709 return FALSE;
2710
2711 /* If y is being written, fill it with
2712 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2713 */
2714 if (dst.mask & TGSI_WRITEMASK_Y) {
2715 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2716 writemask( dst, TGSI_WRITEMASK_Y ),
2717 negate( scalar( src( floor_log2 ),
2718 TGSI_SWIZZLE_X ) ) ) )
2719 return FALSE;
2720
2721 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2722 writemask( dst, TGSI_WRITEMASK_Y ),
2723 src( dst ),
2724 abs_src0 ) )
2725 return FALSE;
2726 }
2727
2728 if (!(dst.mask & TGSI_WRITEMASK_X))
2729 release_temp( emit, floor_log2 );
2730
2731 if (!(dst.mask & TGSI_WRITEMASK_Z))
2732 release_temp( emit, log2_abs );
2733 }
2734
2735 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2736 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2737 release_temp( emit, abs_tmp );
2738
2739 /* If w is being written, fill it with one.
2740 */
2741 if (dst.mask & TGSI_WRITEMASK_W) {
2742 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2743 writemask(dst, TGSI_WRITEMASK_W),
2744 get_one_immediate(emit)))
2745 return FALSE;
2746 }
2747
2748 return TRUE;
2749 }
2750
2751
2752 /**
2753 * Translate TGSI TRUNC or ROUND instruction.
2754 * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2755 * Different approaches are needed for VS versus PS.
2756 */
2757 static boolean
2758 emit_trunc_round(struct svga_shader_emitter *emit,
2759 const struct tgsi_full_instruction *insn,
2760 boolean round)
2761 {
2762 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2763 const struct src_register src0 =
2764 translate_src_register(emit, &insn->Src[0] );
2765 SVGA3dShaderDestToken t1 = get_temp(emit);
2766
2767 if (round) {
2768 SVGA3dShaderDestToken t0 = get_temp(emit);
2769 struct src_register half = get_half_immediate(emit);
2770
2771 /* t0 = abs(src0) + 0.5 */
2772 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2773 absolute(src0), half))
2774 return FALSE;
2775
2776 /* t1 = fract(t0) */
2777 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2778 return FALSE;
2779
2780 /* t1 = t0 - t1 */
2781 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2782 negate(src(t1))))
2783 return FALSE;
2784 }
2785 else {
2786 /* trunc */
2787
2788 /* t1 = fract(abs(src0)) */
2789 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2790 return FALSE;
2791
2792 /* t1 = abs(src0) - t1 */
2793 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2794 negate(src(t1))))
2795 return FALSE;
2796 }
2797
2798 /*
2799 * Now we need to multiply t1 by the sign of the original value.
2800 */
2801 if (emit->unit == PIPE_SHADER_VERTEX) {
2802 /* For VS: use SGN instruction */
2803 /* Need two extra/dummy registers: */
2804 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2805 t4 = get_temp(emit);
2806
2807 /* t2 = sign(src0) */
2808 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2809 src(t3), src(t4)))
2810 return FALSE;
2811
2812 /* dst = t1 * t2 */
2813 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2814 return FALSE;
2815 }
2816 else {
2817 /* For FS: Use CMP instruction */
2818 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2819 src0, src(t1), negate(src(t1)));
2820 }
2821
2822 return TRUE;
2823 }
2824
2825
2826 /**
2827 * Translate/emit "begin subroutine" instruction/marker/label.
2828 */
2829 static boolean
2830 emit_bgnsub(struct svga_shader_emitter *emit,
2831 unsigned position,
2832 const struct tgsi_full_instruction *insn)
2833 {
2834 unsigned i;
2835
2836 /* Note that we've finished the main function and are now emitting
2837 * subroutines. This affects how we terminate the generated
2838 * shader.
2839 */
2840 emit->in_main_func = FALSE;
2841
2842 for (i = 0; i < emit->nr_labels; i++) {
2843 if (emit->label[i] == position) {
2844 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2845 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2846 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2847 }
2848 }
2849
2850 assert(0);
2851 return TRUE;
2852 }
2853
2854
2855 /**
2856 * Translate/emit subroutine call instruction.
2857 */
2858 static boolean
2859 emit_call(struct svga_shader_emitter *emit,
2860 const struct tgsi_full_instruction *insn)
2861 {
2862 unsigned position = insn->Label.Label;
2863 unsigned i;
2864
2865 for (i = 0; i < emit->nr_labels; i++) {
2866 if (emit->label[i] == position)
2867 break;
2868 }
2869
2870 if (emit->nr_labels == ARRAY_SIZE(emit->label))
2871 return FALSE;
2872
2873 if (i == emit->nr_labels) {
2874 emit->label[i] = position;
2875 emit->nr_labels++;
2876 }
2877
2878 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2879 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2880 }
2881
2882
2883 /**
2884 * Called at the end of the shader. Actually, emit special "fix-up"
2885 * code for the vertex/fragment shader.
2886 */
2887 static boolean
2888 emit_end(struct svga_shader_emitter *emit)
2889 {
2890 if (emit->unit == PIPE_SHADER_VERTEX) {
2891 return emit_vs_postamble( emit );
2892 }
2893 else {
2894 return emit_ps_postamble( emit );
2895 }
2896 }
2897
2898
2899 /**
2900 * Translate any TGSI instruction to SVGA.
2901 */
2902 static boolean
2903 svga_emit_instruction(struct svga_shader_emitter *emit,
2904 unsigned position,
2905 const struct tgsi_full_instruction *insn)
2906 {
2907 switch (insn->Instruction.Opcode) {
2908
2909 case TGSI_OPCODE_ARL:
2910 return emit_arl( emit, insn );
2911
2912 case TGSI_OPCODE_TEX:
2913 case TGSI_OPCODE_TXB:
2914 case TGSI_OPCODE_TXP:
2915 case TGSI_OPCODE_TXL:
2916 case TGSI_OPCODE_TXD:
2917 return emit_tex( emit, insn );
2918
2919 case TGSI_OPCODE_DDX:
2920 case TGSI_OPCODE_DDY:
2921 return emit_deriv( emit, insn );
2922
2923 case TGSI_OPCODE_BGNSUB:
2924 return emit_bgnsub( emit, position, insn );
2925
2926 case TGSI_OPCODE_ENDSUB:
2927 return TRUE;
2928
2929 case TGSI_OPCODE_CAL:
2930 return emit_call( emit, insn );
2931
2932 case TGSI_OPCODE_FLR:
2933 return emit_floor( emit, insn );
2934
2935 case TGSI_OPCODE_TRUNC:
2936 return emit_trunc_round( emit, insn, FALSE );
2937
2938 case TGSI_OPCODE_ROUND:
2939 return emit_trunc_round( emit, insn, TRUE );
2940
2941 case TGSI_OPCODE_CEIL:
2942 return emit_ceil( emit, insn );
2943
2944 case TGSI_OPCODE_CMP:
2945 return emit_cmp( emit, insn );
2946
2947 case TGSI_OPCODE_DIV:
2948 return emit_div( emit, insn );
2949
2950 case TGSI_OPCODE_DP2:
2951 return emit_dp2( emit, insn );
2952
2953 case TGSI_OPCODE_DPH:
2954 return emit_dph( emit, insn );
2955
2956 case TGSI_OPCODE_COS:
2957 return emit_cos( emit, insn );
2958
2959 case TGSI_OPCODE_SIN:
2960 return emit_sin( emit, insn );
2961
2962 case TGSI_OPCODE_SCS:
2963 return emit_sincos( emit, insn );
2964
2965 case TGSI_OPCODE_END:
2966 /* TGSI always finishes the main func with an END */
2967 return emit_end( emit );
2968
2969 case TGSI_OPCODE_KILL_IF:
2970 return emit_kill_if( emit, insn );
2971
2972 /* Selection opcodes. The underlying language is fairly
2973 * non-orthogonal about these.
2974 */
2975 case TGSI_OPCODE_SEQ:
2976 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2977
2978 case TGSI_OPCODE_SNE:
2979 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2980
2981 case TGSI_OPCODE_SGT:
2982 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2983
2984 case TGSI_OPCODE_SGE:
2985 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2986
2987 case TGSI_OPCODE_SLT:
2988 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2989
2990 case TGSI_OPCODE_SLE:
2991 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2992
2993 case TGSI_OPCODE_SUB:
2994 return emit_sub( emit, insn );
2995
2996 case TGSI_OPCODE_POW:
2997 return emit_pow( emit, insn );
2998
2999 case TGSI_OPCODE_EX2:
3000 return emit_ex2( emit, insn );
3001
3002 case TGSI_OPCODE_EXP:
3003 return emit_exp( emit, insn );
3004
3005 case TGSI_OPCODE_LOG:
3006 return emit_log( emit, insn );
3007
3008 case TGSI_OPCODE_LG2:
3009 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
3010
3011 case TGSI_OPCODE_RSQ:
3012 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
3013
3014 case TGSI_OPCODE_RCP:
3015 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
3016
3017 case TGSI_OPCODE_CONT:
3018 /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */
3019 return FALSE;
3020
3021 case TGSI_OPCODE_RET:
3022 /* This is a noop -- we tell mesa that we can't support RET
3023 * within a function (early return), so this will always be
3024 * followed by an ENDSUB.
3025 */
3026 return TRUE;
3027
3028 /* These aren't actually used by any of the frontends we care
3029 * about:
3030 */
3031 case TGSI_OPCODE_CLAMP:
3032 case TGSI_OPCODE_AND:
3033 case TGSI_OPCODE_OR:
3034 case TGSI_OPCODE_I2F:
3035 case TGSI_OPCODE_NOT:
3036 case TGSI_OPCODE_SHL:
3037 case TGSI_OPCODE_ISHR:
3038 case TGSI_OPCODE_XOR:
3039 return FALSE;
3040
3041 case TGSI_OPCODE_IF:
3042 return emit_if( emit, insn );
3043 case TGSI_OPCODE_ELSE:
3044 return emit_else( emit, insn );
3045 case TGSI_OPCODE_ENDIF:
3046 return emit_endif( emit, insn );
3047
3048 case TGSI_OPCODE_BGNLOOP:
3049 return emit_bgnloop( emit, insn );
3050 case TGSI_OPCODE_ENDLOOP:
3051 return emit_endloop( emit, insn );
3052 case TGSI_OPCODE_BRK:
3053 return emit_brk( emit, insn );
3054
3055 case TGSI_OPCODE_XPD:
3056 return emit_xpd( emit, insn );
3057
3058 case TGSI_OPCODE_KILL:
3059 return emit_kill( emit, insn );
3060
3061 case TGSI_OPCODE_DST:
3062 return emit_dst_insn( emit, insn );
3063
3064 case TGSI_OPCODE_LIT:
3065 return emit_lit( emit, insn );
3066
3067 case TGSI_OPCODE_LRP:
3068 return emit_lrp( emit, insn );
3069
3070 case TGSI_OPCODE_SSG:
3071 return emit_ssg( emit, insn );
3072
3073 case TGSI_OPCODE_MOV:
3074 return emit_mov( emit, insn );
3075
3076 default:
3077 {
3078 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
3079
3080 if (opcode == SVGA3DOP_LAST_INST)
3081 return FALSE;
3082
3083 if (!emit_simple_instruction( emit, opcode, insn ))
3084 return FALSE;
3085 }
3086 }
3087
3088 return TRUE;
3089 }
3090
3091
3092 /**
3093 * Translate/emit a TGSI IMMEDIATE declaration.
3094 * An immediate vector is a constant that's hard-coded into the shader.
3095 */
3096 static boolean
3097 svga_emit_immediate(struct svga_shader_emitter *emit,
3098 const struct tgsi_full_immediate *imm)
3099 {
3100 static const float id[4] = {0,0,0,1};
3101 float value[4];
3102 unsigned i;
3103
3104 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3105 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
3106 float f = imm->u[i].Float;
3107 value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3108 }
3109
3110 /* If the immediate has less than four values, fill in the remaining
3111 * positions from id={0,0,0,1}.
3112 */
3113 for ( ; i < 4; i++ )
3114 value[i] = id[i];
3115
3116 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3117 emit->imm_start + emit->internal_imm_count++,
3118 value[0], value[1], value[2], value[3]);
3119 }
3120
3121
3122 static boolean
3123 make_immediate(struct svga_shader_emitter *emit,
3124 float a, float b, float c, float d,
3125 struct src_register *out )
3126 {
3127 unsigned idx = emit->nr_hw_float_const++;
3128
3129 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3130 idx, a, b, c, d ))
3131 return FALSE;
3132
3133 *out = src_register( SVGA3DREG_CONST, idx );
3134
3135 return TRUE;
3136 }
3137
3138
3139 /**
3140 * Emit special VS instructions at top of shader.
3141 */
3142 static boolean
3143 emit_vs_preamble(struct svga_shader_emitter *emit)
3144 {
3145 if (!emit->key.vs.need_prescale) {
3146 if (!make_immediate( emit, 0, 0, .5, .5,
3147 &emit->imm_0055))
3148 return FALSE;
3149 }
3150
3151 return TRUE;
3152 }
3153
3154
3155 /**
3156 * Emit special PS instructions at top of shader.
3157 */
3158 static boolean
3159 emit_ps_preamble(struct svga_shader_emitter *emit)
3160 {
3161 if (emit->ps_reads_pos && emit->info.reads_z) {
3162 /*
3163 * Assemble the position from various bits of inputs. Depth and W are
3164 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3165 * Also fixup the perspective interpolation.
3166 *
3167 * temp_pos.xy = vPos.xy
3168 * temp_pos.w = rcp(texcoord1.w);
3169 * temp_pos.z = texcoord1.z * temp_pos.w;
3170 */
3171 if (!submit_op1( emit,
3172 inst_token(SVGA3DOP_MOV),
3173 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3174 emit->ps_true_pos ))
3175 return FALSE;
3176
3177 if (!submit_op1( emit,
3178 inst_token(SVGA3DOP_RCP),
3179 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3180 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3181 return FALSE;
3182
3183 if (!submit_op2( emit,
3184 inst_token(SVGA3DOP_MUL),
3185 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3186 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3187 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3188 return FALSE;
3189 }
3190
3191 return TRUE;
3192 }
3193
3194
3195 /**
3196 * Emit special PS instructions at end of shader.
3197 */
3198 static boolean
3199 emit_ps_postamble(struct svga_shader_emitter *emit)
3200 {
3201 unsigned i;
3202
3203 /* PS oDepth is incredibly fragile and it's very hard to catch the
3204 * types of usage that break it during shader emit. Easier just to
3205 * redirect the main program to a temporary and then only touch
3206 * oDepth with a hand-crafted MOV below.
3207 */
3208 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3209 if (!submit_op1( emit,
3210 inst_token(SVGA3DOP_MOV),
3211 emit->true_pos,
3212 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3213 return FALSE;
3214 }
3215
3216 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3217 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3218 /* Potentially override output colors with white for XOR
3219 * logicop workaround.
3220 */
3221 if (emit->unit == PIPE_SHADER_FRAGMENT &&
3222 emit->key.fs.white_fragments) {
3223 struct src_register one = get_one_immediate(emit);
3224
3225 if (!submit_op1( emit,
3226 inst_token(SVGA3DOP_MOV),
3227 emit->true_color_output[i],
3228 one ))
3229 return FALSE;
3230 }
3231 else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3232 i < emit->key.fs.write_color0_to_n_cbufs) {
3233 /* Write temp color output [0] to true output [i] */
3234 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3235 emit->true_color_output[i],
3236 src(emit->temp_color_output[0]))) {
3237 return FALSE;
3238 }
3239 }
3240 else {
3241 if (!submit_op1( emit,
3242 inst_token(SVGA3DOP_MOV),
3243 emit->true_color_output[i],
3244 src(emit->temp_color_output[i]) ))
3245 return FALSE;
3246 }
3247 }
3248 }
3249
3250 return TRUE;
3251 }
3252
3253
3254 /**
3255 * Emit special VS instructions at end of shader.
3256 */
3257 static boolean
3258 emit_vs_postamble(struct svga_shader_emitter *emit)
3259 {
3260 /* PSIZ output is incredibly fragile and it's very hard to catch
3261 * the types of usage that break it during shader emit. Easier
3262 * just to redirect the main program to a temporary and then only
3263 * touch PSIZ with a hand-crafted MOV below.
3264 */
3265 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3266 if (!submit_op1( emit,
3267 inst_token(SVGA3DOP_MOV),
3268 emit->true_psiz,
3269 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3270 return FALSE;
3271 }
3272
3273 /* Need to perform various manipulations on vertex position to cope
3274 * with the different GL and D3D clip spaces.
3275 */
3276 if (emit->key.vs.need_prescale) {
3277 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3278 SVGA3dShaderDestToken depth = emit->depth_pos;
3279 SVGA3dShaderDestToken pos = emit->true_pos;
3280 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3281 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3282 offset + 0 );
3283 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3284 offset + 1 );
3285
3286 if (!submit_op1( emit,
3287 inst_token(SVGA3DOP_MOV),
3288 writemask(depth, TGSI_WRITEMASK_W),
3289 scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3290 return FALSE;
3291
3292 /* MUL temp_pos.xyz, temp_pos, prescale.scale
3293 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3294 * --> Note that prescale.trans.w == 0
3295 */
3296 if (!submit_op2( emit,
3297 inst_token(SVGA3DOP_MUL),
3298 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3299 src(temp_pos),
3300 prescale_scale ))
3301 return FALSE;
3302
3303 if (!submit_op3( emit,
3304 inst_token(SVGA3DOP_MAD),
3305 pos,
3306 swizzle(src(temp_pos), 3, 3, 3, 3),
3307 prescale_trans,
3308 src(temp_pos)))
3309 return FALSE;
3310
3311 /* Also write to depth value */
3312 if (!submit_op3( emit,
3313 inst_token(SVGA3DOP_MAD),
3314 writemask(depth, TGSI_WRITEMASK_Z),
3315 swizzle(src(temp_pos), 3, 3, 3, 3),
3316 prescale_trans,
3317 src(temp_pos) ))
3318 return FALSE;
3319 }
3320 else {
3321 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3322 SVGA3dShaderDestToken depth = emit->depth_pos;
3323 SVGA3dShaderDestToken pos = emit->true_pos;
3324 struct src_register imm_0055 = emit->imm_0055;
3325
3326 /* Adjust GL clipping coordinate space to hardware (D3D-style):
3327 *
3328 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3329 * MOV result.position, temp_pos
3330 */
3331 if (!submit_op2( emit,
3332 inst_token(SVGA3DOP_DP4),
3333 writemask(temp_pos, TGSI_WRITEMASK_Z),
3334 imm_0055,
3335 src(temp_pos) ))
3336 return FALSE;
3337
3338 if (!submit_op1( emit,
3339 inst_token(SVGA3DOP_MOV),
3340 pos,
3341 src(temp_pos) ))
3342 return FALSE;
3343
3344 /* Move the manipulated depth into the extra texcoord reg */
3345 if (!submit_op1( emit,
3346 inst_token(SVGA3DOP_MOV),
3347 writemask(depth, TGSI_WRITEMASK_ZW),
3348 src(temp_pos) ))
3349 return FALSE;
3350 }
3351
3352 return TRUE;
3353 }
3354
3355
3356 /**
3357 * For the pixel shader: emit the code which chooses the front
3358 * or back face color depending on triangle orientation.
3359 * This happens at the top of the fragment shader.
3360 *
3361 * 0: IF VFACE :4
3362 * 1: COLOR = FrontColor;
3363 * 2: ELSE
3364 * 3: COLOR = BackColor;
3365 * 4: ENDIF
3366 */
3367 static boolean
3368 emit_light_twoside(struct svga_shader_emitter *emit)
3369 {
3370 struct src_register vface, zero;
3371 struct src_register front[2];
3372 struct src_register back[2];
3373 SVGA3dShaderDestToken color[2];
3374 int count = emit->internal_color_count;
3375 unsigned i;
3376 SVGA3dShaderInstToken if_token;
3377
3378 if (count == 0)
3379 return TRUE;
3380
3381 vface = get_vface( emit );
3382 zero = get_zero_immediate(emit);
3383
3384 /* Can't use get_temp() to allocate the color reg as such
3385 * temporaries will be reclaimed after each instruction by the call
3386 * to reset_temp_regs().
3387 */
3388 for (i = 0; i < count; i++) {
3389 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3390 front[i] = emit->input_map[emit->internal_color_idx[i]];
3391
3392 /* Back is always the next input:
3393 */
3394 back[i] = front[i];
3395 back[i].base.num = front[i].base.num + 1;
3396
3397 /* Reassign the input_map to the actual front-face color:
3398 */
3399 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3400 }
3401
3402 if_token = inst_token( SVGA3DOP_IFC );
3403
3404 if (emit->key.fs.front_ccw)
3405 if_token.control = SVGA3DOPCOMP_LT;
3406 else
3407 if_token.control = SVGA3DOPCOMP_GT;
3408
3409 if (!(emit_instruction( emit, if_token ) &&
3410 emit_src( emit, vface ) &&
3411 emit_src( emit, zero ) ))
3412 return FALSE;
3413
3414 for (i = 0; i < count; i++) {
3415 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3416 return FALSE;
3417 }
3418
3419 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3420 return FALSE;
3421
3422 for (i = 0; i < count; i++) {
3423 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3424 return FALSE;
3425 }
3426
3427 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3428 return FALSE;
3429
3430 return TRUE;
3431 }
3432
3433
3434 /**
3435 * Emit special setup code for the front/back face register in the FS.
3436 * 0: SETP_GT TEMP, VFACE, 0
3437 * where TEMP is a fake frontface register
3438 */
3439 static boolean
3440 emit_frontface(struct svga_shader_emitter *emit)
3441 {
3442 struct src_register vface;
3443 SVGA3dShaderDestToken temp;
3444 struct src_register pass, fail;
3445
3446 vface = get_vface( emit );
3447
3448 /* Can't use get_temp() to allocate the fake frontface reg as such
3449 * temporaries will be reclaimed after each instruction by the call
3450 * to reset_temp_regs().
3451 */
3452 temp = dst_register( SVGA3DREG_TEMP,
3453 emit->nr_hw_temp++ );
3454
3455 if (emit->key.fs.front_ccw) {
3456 pass = get_zero_immediate(emit);
3457 fail = get_one_immediate(emit);
3458 } else {
3459 pass = get_one_immediate(emit);
3460 fail = get_zero_immediate(emit);
3461 }
3462
3463 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3464 temp, vface, get_zero_immediate(emit),
3465 pass, fail))
3466 return FALSE;
3467
3468 /* Reassign the input_map to the actual front-face color:
3469 */
3470 emit->input_map[emit->internal_frontface_idx] = src(temp);
3471
3472 return TRUE;
3473 }
3474
3475
3476 /**
3477 * Emit code to invert the T component of the incoming texture coordinate.
3478 * This is used for drawing point sprites when
3479 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3480 */
3481 static boolean
3482 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3483 {
3484 unsigned inverted_texcoords = emit->inverted_texcoords;
3485
3486 while (inverted_texcoords) {
3487 const unsigned unit = ffs(inverted_texcoords) - 1;
3488
3489 assert(emit->inverted_texcoords & (1 << unit));
3490
3491 assert(unit < ARRAY_SIZE(emit->ps_true_texcoord));
3492
3493 assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input));
3494
3495 assert(emit->ps_inverted_texcoord_input[unit]
3496 < ARRAY_SIZE(emit->input_map));
3497
3498 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3499 if (!submit_op3(emit,
3500 inst_token(SVGA3DOP_MAD),
3501 dst(emit->ps_inverted_texcoord[unit]),
3502 emit->ps_true_texcoord[unit],
3503 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3504 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3505 return FALSE;
3506
3507 /* Reassign the input_map entry to the new texcoord register */
3508 emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3509 emit->ps_inverted_texcoord[unit];
3510
3511 inverted_texcoords &= ~(1 << unit);
3512 }
3513
3514 return TRUE;
3515 }
3516
3517
3518 /**
3519 * Emit code to adjust vertex shader inputs/attributes:
3520 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3521 * - Set attrib W component = 1.
3522 */
3523 static boolean
3524 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3525 {
3526 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3527 emit->key.vs.adjust_attrib_w_1);
3528
3529 while (adjust_mask) {
3530 /* Adjust vertex attrib range and/or set W component = 1 */
3531 const unsigned index = u_bit_scan(&adjust_mask);
3532 struct src_register tmp;
3533
3534 /* allocate a temp reg */
3535 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3536 emit->nr_hw_temp++;
3537
3538 if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3539 /* The vertex input/attribute is supposed to be a signed value in
3540 * the range [-1,1] but we actually fetched/converted it to the
3541 * range [0,1]. This most likely happens when the app specifies a
3542 * signed byte attribute but we interpreted it as unsigned bytes.
3543 * See also svga_translate_vertex_format().
3544 *
3545 * Here, we emit some extra instructions to adjust
3546 * the attribute values from [0,1] to [-1,1].
3547 *
3548 * The adjustment we implement is:
3549 * new_attrib = attrib * 2.0;
3550 * if (attrib >= 0.5)
3551 * new_attrib = new_attrib - 2.0;
3552 * This isn't exactly right (it's off by a bit or so) but close enough.
3553 */
3554 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3555
3556 /* tmp = attrib * 2.0 */
3557 if (!submit_op2(emit,
3558 inst_token(SVGA3DOP_MUL),
3559 dst(tmp),
3560 emit->input_map[index],
3561 get_two_immediate(emit)))
3562 return FALSE;
3563
3564 /* pred = (attrib >= 0.5) */
3565 if (!submit_op2(emit,
3566 inst_token_setp(SVGA3DOPCOMP_GE),
3567 pred_reg,
3568 emit->input_map[index], /* vert attrib */
3569 get_half_immediate(emit))) /* 0.5 */
3570 return FALSE;
3571
3572 /* sub(pred) tmp, tmp, 2.0 */
3573 if (!submit_op3(emit,
3574 inst_token_predicated(SVGA3DOP_SUB),
3575 dst(tmp),
3576 src(pred_reg),
3577 tmp,
3578 get_two_immediate(emit)))
3579 return FALSE;
3580 }
3581 else {
3582 /* just copy the vertex input attrib to the temp register */
3583 if (!submit_op1(emit,
3584 inst_token(SVGA3DOP_MOV),
3585 dst(tmp),
3586 emit->input_map[index]))
3587 return FALSE;
3588 }
3589
3590 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3591 /* move 1 into W position of tmp */
3592 if (!submit_op1(emit,
3593 inst_token(SVGA3DOP_MOV),
3594 writemask(dst(tmp), TGSI_WRITEMASK_W),
3595 get_one_immediate(emit)))
3596 return FALSE;
3597 }
3598
3599 /* Reassign the input_map entry to the new tmp register */
3600 emit->input_map[index] = tmp;
3601 }
3602
3603 return TRUE;
3604 }
3605
3606
3607 /**
3608 * Determine if we need to create the "common" immediate value which is
3609 * used for generating useful vector constants such as {0,0,0,0} and
3610 * {1,1,1,1}.
3611 * We could just do this all the time except that we want to conserve
3612 * registers whenever possible.
3613 */
3614 static boolean
3615 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3616 {
3617 unsigned i;
3618
3619 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3620 if (emit->key.fs.light_twoside)
3621 return TRUE;
3622
3623 if (emit->key.fs.white_fragments)
3624 return TRUE;
3625
3626 if (emit->emit_frontface)
3627 return TRUE;
3628
3629 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3630 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3631 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3632 return TRUE;
3633
3634 if (emit->inverted_texcoords)
3635 return TRUE;
3636
3637 /* look for any PIPE_SWIZZLE_0/ONE terms */
3638 for (i = 0; i < emit->key.num_textures; i++) {
3639 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W ||
3640 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W ||
3641 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W ||
3642 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W)
3643 return TRUE;
3644 }
3645
3646 for (i = 0; i < emit->key.num_textures; i++) {
3647 if (emit->key.tex[i].compare_mode
3648 == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3649 return TRUE;
3650 }
3651 }
3652 else if (emit->unit == PIPE_SHADER_VERTEX) {
3653 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3654 return TRUE;
3655 if (emit->key.vs.adjust_attrib_range ||
3656 emit->key.vs.adjust_attrib_w_1)
3657 return TRUE;
3658 }
3659
3660 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3661 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3662 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3663 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3664 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3665 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3666 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3667 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3668 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3669 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3670 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3671 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3672 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3673 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3674 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
3675 return TRUE;
3676
3677 return FALSE;
3678 }
3679
3680
3681 /**
3682 * Do we need to create a looping constant?
3683 */
3684 static boolean
3685 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3686 {
3687 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3688 }
3689
3690
3691 static boolean
3692 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3693 {
3694 return (emit->num_arl_consts > 0);
3695 }
3696
3697
3698 static boolean
3699 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3700 int num, int current_arl)
3701 {
3702 unsigned i;
3703 assert(num < 0);
3704
3705 for (i = 0; i < emit->num_arl_consts; ++i) {
3706 if (emit->arl_consts[i].arl_num == current_arl)
3707 break;
3708 }
3709 /* new entry */
3710 if (emit->num_arl_consts == i) {
3711 ++emit->num_arl_consts;
3712 }
3713 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3714 num :
3715 emit->arl_consts[i].number;
3716 emit->arl_consts[i].arl_num = current_arl;
3717 return TRUE;
3718 }
3719
3720
3721 static boolean
3722 pre_parse_instruction( struct svga_shader_emitter *emit,
3723 const struct tgsi_full_instruction *insn,
3724 int current_arl)
3725 {
3726 if (insn->Src[0].Register.Indirect &&
3727 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3728 const struct tgsi_full_src_register *reg = &insn->Src[0];
3729 if (reg->Register.Index < 0) {
3730 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3731 }
3732 }
3733
3734 if (insn->Src[1].Register.Indirect &&
3735 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3736 const struct tgsi_full_src_register *reg = &insn->Src[1];
3737 if (reg->Register.Index < 0) {
3738 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3739 }
3740 }
3741
3742 if (insn->Src[2].Register.Indirect &&
3743 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3744 const struct tgsi_full_src_register *reg = &insn->Src[2];
3745 if (reg->Register.Index < 0) {
3746 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3747 }
3748 }
3749
3750 return TRUE;
3751 }
3752
3753
3754 static boolean
3755 pre_parse_tokens( struct svga_shader_emitter *emit,
3756 const struct tgsi_token *tokens )
3757 {
3758 struct tgsi_parse_context parse;
3759 int current_arl = 0;
3760
3761 tgsi_parse_init( &parse, tokens );
3762
3763 while (!tgsi_parse_end_of_tokens( &parse )) {
3764 tgsi_parse_token( &parse );
3765 switch (parse.FullToken.Token.Type) {
3766 case TGSI_TOKEN_TYPE_IMMEDIATE:
3767 case TGSI_TOKEN_TYPE_DECLARATION:
3768 break;
3769 case TGSI_TOKEN_TYPE_INSTRUCTION:
3770 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3771 TGSI_OPCODE_ARL) {
3772 ++current_arl;
3773 }
3774 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3775 current_arl ))
3776 return FALSE;
3777 break;
3778 default:
3779 break;
3780 }
3781
3782 }
3783 return TRUE;
3784 }
3785
3786
3787 static boolean
3788 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3789 {
3790 if (needs_to_create_common_immediate( emit )) {
3791 create_common_immediate( emit );
3792 }
3793 if (needs_to_create_loop_const( emit )) {
3794 create_loop_const( emit );
3795 }
3796 if (needs_to_create_arl_consts( emit )) {
3797 create_arl_consts( emit );
3798 }
3799
3800 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3801 if (!svga_shader_emit_samplers_decl( emit ))
3802 return FALSE;
3803
3804 if (!emit_ps_preamble( emit ))
3805 return FALSE;
3806
3807 if (emit->key.fs.light_twoside) {
3808 if (!emit_light_twoside( emit ))
3809 return FALSE;
3810 }
3811 if (emit->emit_frontface) {
3812 if (!emit_frontface( emit ))
3813 return FALSE;
3814 }
3815 if (emit->inverted_texcoords) {
3816 if (!emit_inverted_texcoords( emit ))
3817 return FALSE;
3818 }
3819 }
3820 else {
3821 assert(emit->unit == PIPE_SHADER_VERTEX);
3822 if (emit->key.vs.adjust_attrib_range) {
3823 if (!emit_adjusted_vertex_attribs(emit) ||
3824 emit->key.vs.adjust_attrib_w_1) {
3825 return FALSE;
3826 }
3827 }
3828 }
3829
3830 return TRUE;
3831 }
3832
3833
3834 /**
3835 * This is the main entrypoint into the TGSI instruction translater.
3836 * Translate TGSI shader tokens into an SVGA shader.
3837 */
3838 boolean
3839 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3840 const struct tgsi_token *tokens)
3841 {
3842 struct tgsi_parse_context parse;
3843 const struct tgsi_token *new_tokens = NULL;
3844 boolean ret = TRUE;
3845 boolean helpers_emitted = FALSE;
3846 unsigned line_nr = 0;
3847
3848 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3849 unsigned unit;
3850
3851 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3852 TGSI_FILE_INPUT);
3853
3854 if (new_tokens) {
3855 /* Setup texture state for stipple */
3856 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
3857 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3858 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3859 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3860 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3861
3862 emit->pstipple_sampler_unit = unit;
3863
3864 tokens = new_tokens;
3865 }
3866 }
3867
3868 tgsi_parse_init( &parse, tokens );
3869 emit->internal_imm_count = 0;
3870
3871 if (emit->unit == PIPE_SHADER_VERTEX) {
3872 ret = emit_vs_preamble( emit );
3873 if (!ret)
3874 goto done;
3875 }
3876
3877 pre_parse_tokens(emit, tokens);
3878
3879 while (!tgsi_parse_end_of_tokens( &parse )) {
3880 tgsi_parse_token( &parse );
3881
3882 switch (parse.FullToken.Token.Type) {
3883 case TGSI_TOKEN_TYPE_IMMEDIATE:
3884 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3885 if (!ret)
3886 goto done;
3887 break;
3888
3889 case TGSI_TOKEN_TYPE_DECLARATION:
3890 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3891 if (!ret)
3892 goto done;
3893 break;
3894
3895 case TGSI_TOKEN_TYPE_INSTRUCTION:
3896 if (!helpers_emitted) {
3897 if (!svga_shader_emit_helpers( emit ))
3898 goto done;
3899 helpers_emitted = TRUE;
3900 }
3901 ret = svga_emit_instruction( emit,
3902 line_nr++,
3903 &parse.FullToken.FullInstruction );
3904 if (!ret)
3905 goto done;
3906 break;
3907 default:
3908 break;
3909 }
3910
3911 reset_temp_regs( emit );
3912 }
3913
3914 /* Need to terminate the current subroutine. Note that the
3915 * hardware doesn't tolerate shaders without sub-routines
3916 * terminating with RET+END.
3917 */
3918 if (!emit->in_main_func) {
3919 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3920 if (!ret)
3921 goto done;
3922 }
3923
3924 assert(emit->dynamic_branching_level == 0);
3925
3926 /* Need to terminate the whole shader:
3927 */
3928 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3929 if (!ret)
3930 goto done;
3931
3932 done:
3933 tgsi_parse_free( &parse );
3934 if (new_tokens) {
3935 tgsi_free_tokens(new_tokens);
3936 }
3937
3938 return ret;
3939 }