util/pstipple: allow fragment shader POSITION to be a system value
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "util/u_memory.h"
31 #include "util/u_math.h"
32 #include "util/u_pstipple.h"
33
34 #include "svga_tgsi_emit.h"
35 #include "svga_context.h"
36
37
38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
40
41
42 static unsigned
43 translate_opcode(uint opcode)
44 {
45 switch (opcode) {
46 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
47 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
48 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
49 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
50 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
51 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
52 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
53 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
54 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
55 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
56 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
57 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
58 default:
59 assert(!"svga: unexpected opcode in translate_opcode()");
60 return SVGA3DOP_LAST_INST;
61 }
62 }
63
64
65 static unsigned
66 translate_file(unsigned file)
67 {
68 switch (file) {
69 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
70 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
71 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
72 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
73 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
74 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
75 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
76 default:
77 assert(!"svga: unexpected register file in translate_file()");
78 return SVGA3DREG_TEMP;
79 }
80 }
81
82
83 /**
84 * Translate a TGSI destination register to an SVGA3DShaderDestToken.
85 * \param insn the TGSI instruction
86 * \param idx which TGSI dest register to translate (usually (always?) zero)
87 */
88 static SVGA3dShaderDestToken
89 translate_dst_register( struct svga_shader_emitter *emit,
90 const struct tgsi_full_instruction *insn,
91 unsigned idx )
92 {
93 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
94 SVGA3dShaderDestToken dest;
95
96 switch (reg->Register.File) {
97 case TGSI_FILE_OUTPUT:
98 /* Output registers encode semantic information in their name.
99 * Need to lookup a table built at decl time:
100 */
101 dest = emit->output_map[reg->Register.Index];
102 emit->num_output_writes++;
103 break;
104
105 default:
106 {
107 unsigned index = reg->Register.Index;
108 assert(index < SVGA3D_TEMPREG_MAX);
109 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
110 dest = dst_register(translate_file(reg->Register.File), index);
111 }
112 break;
113 }
114
115 if (reg->Register.Indirect) {
116 debug_warning("Indirect indexing of dest registers is not supported!\n");
117 }
118
119 dest.mask = reg->Register.WriteMask;
120 assert(dest.mask);
121
122 if (insn->Instruction.Saturate)
123 dest.dstMod = SVGA3DDSTMOD_SATURATE;
124
125 return dest;
126 }
127
128
129 /**
130 * Apply a swizzle to a src_register, returning a new src_register
131 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
132 * would return SRC.YYZZ
133 */
134 static struct src_register
135 swizzle(struct src_register src,
136 unsigned x, unsigned y, unsigned z, unsigned w)
137 {
138 assert(x < 4);
139 assert(y < 4);
140 assert(z < 4);
141 assert(w < 4);
142 x = (src.base.swizzle >> (x * 2)) & 0x3;
143 y = (src.base.swizzle >> (y * 2)) & 0x3;
144 z = (src.base.swizzle >> (z * 2)) & 0x3;
145 w = (src.base.swizzle >> (w * 2)) & 0x3;
146
147 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
148
149 return src;
150 }
151
152
153 /**
154 * Apply a "scalar" swizzle to a src_register returning a new
155 * src_register where all the swizzle terms are the same.
156 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
157 */
158 static struct src_register
159 scalar(struct src_register src, unsigned comp)
160 {
161 assert(comp < 4);
162 return swizzle( src, comp, comp, comp, comp );
163 }
164
165
166 static boolean
167 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
168 {
169 unsigned i;
170
171 for (i = 0; i < emit->num_arl_consts; ++i) {
172 if (emit->arl_consts[i].arl_num == emit->current_arl)
173 return TRUE;
174 }
175 return FALSE;
176 }
177
178
179 static int
180 svga_arl_adjustment( const struct svga_shader_emitter *emit )
181 {
182 unsigned i;
183
184 for (i = 0; i < emit->num_arl_consts; ++i) {
185 if (emit->arl_consts[i].arl_num == emit->current_arl)
186 return emit->arl_consts[i].number;
187 }
188 return 0;
189 }
190
191
192 /**
193 * Translate a TGSI src register to a src_register.
194 */
195 static struct src_register
196 translate_src_register( const struct svga_shader_emitter *emit,
197 const struct tgsi_full_src_register *reg )
198 {
199 struct src_register src;
200
201 switch (reg->Register.File) {
202 case TGSI_FILE_INPUT:
203 /* Input registers are referred to by their semantic name rather
204 * than by index. Use the mapping build up from the decls:
205 */
206 src = emit->input_map[reg->Register.Index];
207 break;
208
209 case TGSI_FILE_IMMEDIATE:
210 /* Immediates are appended after TGSI constants in the D3D
211 * constant buffer.
212 */
213 src = src_register( translate_file( reg->Register.File ),
214 reg->Register.Index + emit->imm_start );
215 break;
216
217 default:
218 src = src_register( translate_file( reg->Register.File ),
219 reg->Register.Index );
220 break;
221 }
222
223 /* Indirect addressing.
224 */
225 if (reg->Register.Indirect) {
226 if (emit->unit == PIPE_SHADER_FRAGMENT) {
227 /* Pixel shaders have only loop registers for relative
228 * addressing into inputs. Ignore the redundant address
229 * register, the contents of aL should be in sync with it.
230 */
231 if (reg->Register.File == TGSI_FILE_INPUT) {
232 src.base.relAddr = 1;
233 src.indirect = src_token(SVGA3DREG_LOOP, 0);
234 }
235 }
236 else {
237 /* Constant buffers only.
238 */
239 if (reg->Register.File == TGSI_FILE_CONSTANT) {
240 /* we shift the offset towards the minimum */
241 if (svga_arl_needs_adjustment( emit )) {
242 src.base.num -= svga_arl_adjustment( emit );
243 }
244 src.base.relAddr = 1;
245
246 /* Not really sure what should go in the second token:
247 */
248 src.indirect = src_token( SVGA3DREG_ADDR,
249 reg->Indirect.Index );
250
251 src.indirect.swizzle = SWIZZLE_XXXX;
252 }
253 }
254 }
255
256 src = swizzle( src,
257 reg->Register.SwizzleX,
258 reg->Register.SwizzleY,
259 reg->Register.SwizzleZ,
260 reg->Register.SwizzleW );
261
262 /* src.mod isn't a bitfield, unfortunately:
263 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
264 */
265 if (reg->Register.Absolute) {
266 if (reg->Register.Negate)
267 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
268 else
269 src.base.srcMod = SVGA3DSRCMOD_ABS;
270 }
271 else {
272 if (reg->Register.Negate)
273 src.base.srcMod = SVGA3DSRCMOD_NEG;
274 else
275 src.base.srcMod = SVGA3DSRCMOD_NONE;
276 }
277
278 return src;
279 }
280
281
282 /*
283 * Get a temporary register.
284 * Note: if we exceed the temporary register limit we just use
285 * register SVGA3D_TEMPREG_MAX - 1.
286 */
287 static SVGA3dShaderDestToken
288 get_temp( struct svga_shader_emitter *emit )
289 {
290 int i = emit->nr_hw_temp + emit->internal_temp_count++;
291 if (i >= SVGA3D_TEMPREG_MAX) {
292 debug_warn_once("svga: Too many temporary registers used in shader\n");
293 i = SVGA3D_TEMPREG_MAX - 1;
294 }
295 return dst_register( SVGA3DREG_TEMP, i );
296 }
297
298
299 /**
300 * Release a single temp. Currently only effective if it was the last
301 * allocated temp, otherwise release will be delayed until the next
302 * call to reset_temp_regs().
303 */
304 static void
305 release_temp( struct svga_shader_emitter *emit,
306 SVGA3dShaderDestToken temp )
307 {
308 if (temp.num == emit->internal_temp_count - 1)
309 emit->internal_temp_count--;
310 }
311
312
313 /**
314 * Release all temps.
315 */
316 static void
317 reset_temp_regs(struct svga_shader_emitter *emit)
318 {
319 emit->internal_temp_count = 0;
320 }
321
322
323 /** Emit bytecode for a src_register */
324 static boolean
325 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
326 {
327 if (src.base.relAddr) {
328 assert(src.base.reserved0);
329 assert(src.indirect.reserved0);
330 return (svga_shader_emit_dword( emit, src.base.value ) &&
331 svga_shader_emit_dword( emit, src.indirect.value ));
332 }
333 else {
334 assert(src.base.reserved0);
335 return svga_shader_emit_dword( emit, src.base.value );
336 }
337 }
338
339
340 /** Emit bytecode for a dst_register */
341 static boolean
342 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
343 {
344 assert(dest.reserved0);
345 assert(dest.mask);
346 return svga_shader_emit_dword( emit, dest.value );
347 }
348
349
350 /** Emit bytecode for a 1-operand instruction */
351 static boolean
352 emit_op1(struct svga_shader_emitter *emit,
353 SVGA3dShaderInstToken inst,
354 SVGA3dShaderDestToken dest,
355 struct src_register src0)
356 {
357 return (emit_instruction(emit, inst) &&
358 emit_dst(emit, dest) &&
359 emit_src(emit, src0));
360 }
361
362
363 /** Emit bytecode for a 2-operand instruction */
364 static boolean
365 emit_op2(struct svga_shader_emitter *emit,
366 SVGA3dShaderInstToken inst,
367 SVGA3dShaderDestToken dest,
368 struct src_register src0,
369 struct src_register src1)
370 {
371 return (emit_instruction(emit, inst) &&
372 emit_dst(emit, dest) &&
373 emit_src(emit, src0) &&
374 emit_src(emit, src1));
375 }
376
377
378 /** Emit bytecode for a 3-operand instruction */
379 static boolean
380 emit_op3(struct svga_shader_emitter *emit,
381 SVGA3dShaderInstToken inst,
382 SVGA3dShaderDestToken dest,
383 struct src_register src0,
384 struct src_register src1,
385 struct src_register src2)
386 {
387 return (emit_instruction(emit, inst) &&
388 emit_dst(emit, dest) &&
389 emit_src(emit, src0) &&
390 emit_src(emit, src1) &&
391 emit_src(emit, src2));
392 }
393
394
395 /** Emit bytecode for a 4-operand instruction */
396 static boolean
397 emit_op4(struct svga_shader_emitter *emit,
398 SVGA3dShaderInstToken inst,
399 SVGA3dShaderDestToken dest,
400 struct src_register src0,
401 struct src_register src1,
402 struct src_register src2,
403 struct src_register src3)
404 {
405 return (emit_instruction(emit, inst) &&
406 emit_dst(emit, dest) &&
407 emit_src(emit, src0) &&
408 emit_src(emit, src1) &&
409 emit_src(emit, src2) &&
410 emit_src(emit, src3));
411 }
412
413
414 /**
415 * Apply the absolute value modifier to the given src_register, returning
416 * a new src_register.
417 */
418 static struct src_register
419 absolute(struct src_register src)
420 {
421 src.base.srcMod = SVGA3DSRCMOD_ABS;
422 return src;
423 }
424
425
426 /**
427 * Apply the negation modifier to the given src_register, returning
428 * a new src_register.
429 */
430 static struct src_register
431 negate(struct src_register src)
432 {
433 switch (src.base.srcMod) {
434 case SVGA3DSRCMOD_ABS:
435 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
436 break;
437 case SVGA3DSRCMOD_ABSNEG:
438 src.base.srcMod = SVGA3DSRCMOD_ABS;
439 break;
440 case SVGA3DSRCMOD_NEG:
441 src.base.srcMod = SVGA3DSRCMOD_NONE;
442 break;
443 case SVGA3DSRCMOD_NONE:
444 src.base.srcMod = SVGA3DSRCMOD_NEG;
445 break;
446 }
447 return src;
448 }
449
450
451
452 /* Replace the src with the temporary specified in the dst, but copying
453 * only the necessary channels, and preserving the original swizzle (which is
454 * important given that several opcodes have constraints in the allowed
455 * swizzles).
456 */
457 static boolean
458 emit_repl(struct svga_shader_emitter *emit,
459 SVGA3dShaderDestToken dst,
460 struct src_register *src0)
461 {
462 unsigned src0_swizzle;
463 unsigned chan;
464
465 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
466
467 src0_swizzle = src0->base.swizzle;
468
469 dst.mask = 0;
470 for (chan = 0; chan < 4; ++chan) {
471 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
472 dst.mask |= 1 << swizzle;
473 }
474 assert(dst.mask);
475
476 src0->base.swizzle = SVGA3DSWIZZLE_NONE;
477
478 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
479 return FALSE;
480
481 *src0 = src( dst );
482 src0->base.swizzle = src0_swizzle;
483
484 return TRUE;
485 }
486
487
488 /**
489 * Submit/emit an instruction with zero operands.
490 */
491 static boolean
492 submit_op0(struct svga_shader_emitter *emit,
493 SVGA3dShaderInstToken inst,
494 SVGA3dShaderDestToken dest)
495 {
496 return (emit_instruction( emit, inst ) &&
497 emit_dst( emit, dest ));
498 }
499
500
501 /**
502 * Submit/emit an instruction with one operand.
503 */
504 static boolean
505 submit_op1(struct svga_shader_emitter *emit,
506 SVGA3dShaderInstToken inst,
507 SVGA3dShaderDestToken dest,
508 struct src_register src0)
509 {
510 return emit_op1( emit, inst, dest, src0 );
511 }
512
513
514 /**
515 * Submit/emit an instruction with two operands.
516 *
517 * SVGA shaders may not refer to >1 constant register in a single
518 * instruction. This function checks for that usage and inserts a
519 * move to temporary if detected.
520 *
521 * The same applies to input registers -- at most a single input
522 * register may be read by any instruction.
523 */
524 static boolean
525 submit_op2(struct svga_shader_emitter *emit,
526 SVGA3dShaderInstToken inst,
527 SVGA3dShaderDestToken dest,
528 struct src_register src0,
529 struct src_register src1)
530 {
531 SVGA3dShaderDestToken temp;
532 SVGA3dShaderRegType type0, type1;
533 boolean need_temp = FALSE;
534
535 temp.value = 0;
536 type0 = SVGA3dShaderGetRegType( src0.base.value );
537 type1 = SVGA3dShaderGetRegType( src1.base.value );
538
539 if (type0 == SVGA3DREG_CONST &&
540 type1 == SVGA3DREG_CONST &&
541 src0.base.num != src1.base.num)
542 need_temp = TRUE;
543
544 if (type0 == SVGA3DREG_INPUT &&
545 type1 == SVGA3DREG_INPUT &&
546 src0.base.num != src1.base.num)
547 need_temp = TRUE;
548
549 if (need_temp) {
550 temp = get_temp( emit );
551
552 if (!emit_repl( emit, temp, &src0 ))
553 return FALSE;
554 }
555
556 if (!emit_op2( emit, inst, dest, src0, src1 ))
557 return FALSE;
558
559 if (need_temp)
560 release_temp( emit, temp );
561
562 return TRUE;
563 }
564
565
566 /**
567 * Submit/emit an instruction with three operands.
568 *
569 * SVGA shaders may not refer to >1 constant register in a single
570 * instruction. This function checks for that usage and inserts a
571 * move to temporary if detected.
572 */
573 static boolean
574 submit_op3(struct svga_shader_emitter *emit,
575 SVGA3dShaderInstToken inst,
576 SVGA3dShaderDestToken dest,
577 struct src_register src0,
578 struct src_register src1,
579 struct src_register src2)
580 {
581 SVGA3dShaderDestToken temp0;
582 SVGA3dShaderDestToken temp1;
583 boolean need_temp0 = FALSE;
584 boolean need_temp1 = FALSE;
585 SVGA3dShaderRegType type0, type1, type2;
586
587 temp0.value = 0;
588 temp1.value = 0;
589 type0 = SVGA3dShaderGetRegType( src0.base.value );
590 type1 = SVGA3dShaderGetRegType( src1.base.value );
591 type2 = SVGA3dShaderGetRegType( src2.base.value );
592
593 if (inst.op != SVGA3DOP_SINCOS) {
594 if (type0 == SVGA3DREG_CONST &&
595 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
596 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
597 need_temp0 = TRUE;
598
599 if (type1 == SVGA3DREG_CONST &&
600 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
601 need_temp1 = TRUE;
602 }
603
604 if (type0 == SVGA3DREG_INPUT &&
605 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
606 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
607 need_temp0 = TRUE;
608
609 if (type1 == SVGA3DREG_INPUT &&
610 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
611 need_temp1 = TRUE;
612
613 if (need_temp0) {
614 temp0 = get_temp( emit );
615
616 if (!emit_repl( emit, temp0, &src0 ))
617 return FALSE;
618 }
619
620 if (need_temp1) {
621 temp1 = get_temp( emit );
622
623 if (!emit_repl( emit, temp1, &src1 ))
624 return FALSE;
625 }
626
627 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
628 return FALSE;
629
630 if (need_temp1)
631 release_temp( emit, temp1 );
632 if (need_temp0)
633 release_temp( emit, temp0 );
634 return TRUE;
635 }
636
637
638 /**
639 * Submit/emit an instruction with four operands.
640 *
641 * SVGA shaders may not refer to >1 constant register in a single
642 * instruction. This function checks for that usage and inserts a
643 * move to temporary if detected.
644 */
645 static boolean
646 submit_op4(struct svga_shader_emitter *emit,
647 SVGA3dShaderInstToken inst,
648 SVGA3dShaderDestToken dest,
649 struct src_register src0,
650 struct src_register src1,
651 struct src_register src2,
652 struct src_register src3)
653 {
654 SVGA3dShaderDestToken temp0;
655 SVGA3dShaderDestToken temp3;
656 boolean need_temp0 = FALSE;
657 boolean need_temp3 = FALSE;
658 SVGA3dShaderRegType type0, type1, type2, type3;
659
660 temp0.value = 0;
661 temp3.value = 0;
662 type0 = SVGA3dShaderGetRegType( src0.base.value );
663 type1 = SVGA3dShaderGetRegType( src1.base.value );
664 type2 = SVGA3dShaderGetRegType( src2.base.value );
665 type3 = SVGA3dShaderGetRegType( src2.base.value );
666
667 /* Make life a little easier - this is only used by the TXD
668 * instruction which is guaranteed not to have a constant/input reg
669 * in one slot at least:
670 */
671 assert(type1 == SVGA3DREG_SAMPLER);
672
673 if (type0 == SVGA3DREG_CONST &&
674 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
675 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
676 need_temp0 = TRUE;
677
678 if (type3 == SVGA3DREG_CONST &&
679 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
680 need_temp3 = TRUE;
681
682 if (type0 == SVGA3DREG_INPUT &&
683 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
684 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
685 need_temp0 = TRUE;
686
687 if (type3 == SVGA3DREG_INPUT &&
688 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
689 need_temp3 = TRUE;
690
691 if (need_temp0) {
692 temp0 = get_temp( emit );
693
694 if (!emit_repl( emit, temp0, &src0 ))
695 return FALSE;
696 }
697
698 if (need_temp3) {
699 temp3 = get_temp( emit );
700
701 if (!emit_repl( emit, temp3, &src3 ))
702 return FALSE;
703 }
704
705 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
706 return FALSE;
707
708 if (need_temp3)
709 release_temp( emit, temp3 );
710 if (need_temp0)
711 release_temp( emit, temp0 );
712 return TRUE;
713 }
714
715
716 /**
717 * Do the src and dest registers refer to the same register?
718 */
719 static boolean
720 alias_src_dst(struct src_register src,
721 SVGA3dShaderDestToken dst)
722 {
723 if (src.base.num != dst.num)
724 return FALSE;
725
726 if (SVGA3dShaderGetRegType(dst.value) !=
727 SVGA3dShaderGetRegType(src.base.value))
728 return FALSE;
729
730 return TRUE;
731 }
732
733
734 /**
735 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
736 * instructions.
737 */
738 static boolean
739 emit_def_const(struct svga_shader_emitter *emit,
740 SVGA3dShaderConstType type,
741 unsigned idx, float a, float b, float c, float d)
742 {
743 SVGA3DOpDefArgs def;
744 SVGA3dShaderInstToken opcode;
745
746 switch (type) {
747 case SVGA3D_CONST_TYPE_FLOAT:
748 opcode = inst_token( SVGA3DOP_DEF );
749 def.dst = dst_register( SVGA3DREG_CONST, idx );
750 def.constValues[0] = a;
751 def.constValues[1] = b;
752 def.constValues[2] = c;
753 def.constValues[3] = d;
754 break;
755 case SVGA3D_CONST_TYPE_INT:
756 opcode = inst_token( SVGA3DOP_DEFI );
757 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
758 def.constIValues[0] = (int)a;
759 def.constIValues[1] = (int)b;
760 def.constIValues[2] = (int)c;
761 def.constIValues[3] = (int)d;
762 break;
763 default:
764 assert(0);
765 opcode = inst_token( SVGA3DOP_NOP );
766 break;
767 }
768
769 if (!emit_instruction(emit, opcode) ||
770 !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
771 return FALSE;
772
773 return TRUE;
774 }
775
776
777 static boolean
778 create_loop_const( struct svga_shader_emitter *emit )
779 {
780 unsigned idx = emit->nr_hw_int_const++;
781
782 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
783 255, /* iteration count */
784 0, /* initial value */
785 1, /* step size */
786 0 /* not used, must be 0 */))
787 return FALSE;
788
789 emit->loop_const_idx = idx;
790 emit->created_loop_const = TRUE;
791
792 return TRUE;
793 }
794
795 static boolean
796 create_arl_consts( struct svga_shader_emitter *emit )
797 {
798 int i;
799
800 for (i = 0; i < emit->num_arl_consts; i += 4) {
801 int j;
802 unsigned idx = emit->nr_hw_float_const++;
803 float vals[4];
804 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
805 vals[j] = (float) emit->arl_consts[i + j].number;
806 emit->arl_consts[i + j].idx = idx;
807 switch (j) {
808 case 0:
809 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
810 break;
811 case 1:
812 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
813 break;
814 case 2:
815 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
816 break;
817 case 3:
818 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
819 break;
820 }
821 }
822 while (j < 4)
823 vals[j++] = 0;
824
825 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
826 vals[0], vals[1],
827 vals[2], vals[3]))
828 return FALSE;
829 }
830
831 return TRUE;
832 }
833
834
835 /**
836 * Return the register which holds the pixel shaders front/back-
837 * facing value.
838 */
839 static struct src_register
840 get_vface( struct svga_shader_emitter *emit )
841 {
842 assert(emit->emitted_vface);
843 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
844 }
845
846
847 /**
848 * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
849 * We can swizzle this to produce other useful constants such as
850 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
851 */
852 static boolean
853 create_common_immediate( struct svga_shader_emitter *emit )
854 {
855 unsigned idx = emit->nr_hw_float_const++;
856
857 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
858 * other useful vectors.
859 */
860 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
861 idx, 0.0f, 0.5f, -1.0f, 1.0f ))
862 return FALSE;
863 emit->common_immediate_idx[0] = idx;
864 idx++;
865
866 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
867 if (emit->key.vs.adjust_attrib_range) {
868 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
869 idx, 2.0f, 0.0f, 0.0f, 0.0f ))
870 return FALSE;
871 emit->common_immediate_idx[1] = idx;
872 }
873 else {
874 emit->common_immediate_idx[1] = -1;
875 }
876
877 emit->created_common_immediate = TRUE;
878
879 return TRUE;
880 }
881
882
883 /**
884 * Return swizzle/position for the given value in the "common" immediate.
885 */
886 static inline unsigned
887 common_immediate_swizzle(float value)
888 {
889 if (value == 0.0f)
890 return TGSI_SWIZZLE_X;
891 else if (value == 0.5f)
892 return TGSI_SWIZZLE_Y;
893 else if (value == -1.0f)
894 return TGSI_SWIZZLE_Z;
895 else if (value == 1.0f)
896 return TGSI_SWIZZLE_W;
897 else {
898 assert(!"illegal value in common_immediate_swizzle");
899 return TGSI_SWIZZLE_X;
900 }
901 }
902
903
904 /**
905 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
906 */
907 static struct src_register
908 get_immediate(struct svga_shader_emitter *emit,
909 float x, float y, float z, float w)
910 {
911 unsigned sx = common_immediate_swizzle(x);
912 unsigned sy = common_immediate_swizzle(y);
913 unsigned sz = common_immediate_swizzle(z);
914 unsigned sw = common_immediate_swizzle(w);
915 assert(emit->created_common_immediate);
916 assert(emit->common_immediate_idx[0] >= 0);
917 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
918 sx, sy, sz, sw);
919 }
920
921
922 /**
923 * returns {0, 0, 0, 0} immediate
924 */
925 static struct src_register
926 get_zero_immediate( struct svga_shader_emitter *emit )
927 {
928 assert(emit->created_common_immediate);
929 assert(emit->common_immediate_idx[0] >= 0);
930 return swizzle(src_register( SVGA3DREG_CONST,
931 emit->common_immediate_idx[0]),
932 0, 0, 0, 0);
933 }
934
935
936 /**
937 * returns {1, 1, 1, 1} immediate
938 */
939 static struct src_register
940 get_one_immediate( struct svga_shader_emitter *emit )
941 {
942 assert(emit->created_common_immediate);
943 assert(emit->common_immediate_idx[0] >= 0);
944 return swizzle(src_register( SVGA3DREG_CONST,
945 emit->common_immediate_idx[0]),
946 3, 3, 3, 3);
947 }
948
949
950 /**
951 * returns {0.5, 0.5, 0.5, 0.5} immediate
952 */
953 static struct src_register
954 get_half_immediate( struct svga_shader_emitter *emit )
955 {
956 assert(emit->created_common_immediate);
957 assert(emit->common_immediate_idx[0] >= 0);
958 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
959 1, 1, 1, 1);
960 }
961
962
963 /**
964 * returns {2, 2, 2, 2} immediate
965 */
966 static struct src_register
967 get_two_immediate( struct svga_shader_emitter *emit )
968 {
969 /* Note we use the second common immediate here */
970 assert(emit->created_common_immediate);
971 assert(emit->common_immediate_idx[1] >= 0);
972 return swizzle(src_register( SVGA3DREG_CONST,
973 emit->common_immediate_idx[1]),
974 0, 0, 0, 0);
975 }
976
977
978 /**
979 * returns the loop const
980 */
981 static struct src_register
982 get_loop_const( struct svga_shader_emitter *emit )
983 {
984 assert(emit->created_loop_const);
985 assert(emit->loop_const_idx >= 0);
986 return src_register( SVGA3DREG_CONSTINT,
987 emit->loop_const_idx );
988 }
989
990
991 static struct src_register
992 get_fake_arl_const( struct svga_shader_emitter *emit )
993 {
994 struct src_register reg;
995 int idx = 0, swizzle = 0, i;
996
997 for (i = 0; i < emit->num_arl_consts; ++ i) {
998 if (emit->arl_consts[i].arl_num == emit->current_arl) {
999 idx = emit->arl_consts[i].idx;
1000 swizzle = emit->arl_consts[i].swizzle;
1001 }
1002 }
1003
1004 reg = src_register( SVGA3DREG_CONST, idx );
1005 return scalar(reg, swizzle);
1006 }
1007
1008
1009 /**
1010 * Return a register which holds the width and height of the texture
1011 * currently bound to the given sampler.
1012 */
1013 static struct src_register
1014 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
1015 {
1016 int idx;
1017 struct src_register reg;
1018
1019 /* the width/height indexes start right after constants */
1020 idx = emit->key.tex[sampler_num].width_height_idx +
1021 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1022
1023 reg = src_register( SVGA3DREG_CONST, idx );
1024 return reg;
1025 }
1026
1027
1028 static boolean
1029 emit_fake_arl(struct svga_shader_emitter *emit,
1030 const struct tgsi_full_instruction *insn)
1031 {
1032 const struct src_register src0 =
1033 translate_src_register(emit, &insn->Src[0] );
1034 struct src_register src1 = get_fake_arl_const( emit );
1035 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1036 SVGA3dShaderDestToken tmp = get_temp( emit );
1037
1038 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1039 return FALSE;
1040
1041 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1042 src1))
1043 return FALSE;
1044
1045 /* replicate the original swizzle */
1046 src1 = src(tmp);
1047 src1.base.swizzle = src0.base.swizzle;
1048
1049 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1050 dst, src1 );
1051 }
1052
1053
1054 static boolean
1055 emit_if(struct svga_shader_emitter *emit,
1056 const struct tgsi_full_instruction *insn)
1057 {
1058 struct src_register src0 =
1059 translate_src_register(emit, &insn->Src[0]);
1060 struct src_register zero = get_zero_immediate(emit);
1061 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1062
1063 if_token.control = SVGA3DOPCOMPC_NE;
1064
1065 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1066 /*
1067 * Max different constant registers readable per IFC instruction is 1.
1068 */
1069 SVGA3dShaderDestToken tmp = get_temp( emit );
1070
1071 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1072 return FALSE;
1073
1074 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1075 }
1076
1077 emit->dynamic_branching_level++;
1078
1079 return (emit_instruction( emit, if_token ) &&
1080 emit_src( emit, src0 ) &&
1081 emit_src( emit, zero ) );
1082 }
1083
1084
1085 static boolean
1086 emit_else(struct svga_shader_emitter *emit,
1087 const struct tgsi_full_instruction *insn)
1088 {
1089 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1090 }
1091
1092
1093 static boolean
1094 emit_endif(struct svga_shader_emitter *emit,
1095 const struct tgsi_full_instruction *insn)
1096 {
1097 emit->dynamic_branching_level--;
1098
1099 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1100 }
1101
1102
1103 /**
1104 * Translate the following TGSI FLR instruction.
1105 * FLR DST, SRC
1106 * To the following SVGA3D instruction sequence.
1107 * FRC TMP, SRC
1108 * SUB DST, SRC, TMP
1109 */
1110 static boolean
1111 emit_floor(struct svga_shader_emitter *emit,
1112 const struct tgsi_full_instruction *insn )
1113 {
1114 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1115 const struct src_register src0 =
1116 translate_src_register(emit, &insn->Src[0] );
1117 SVGA3dShaderDestToken temp = get_temp( emit );
1118
1119 /* FRC TMP, SRC */
1120 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1121 return FALSE;
1122
1123 /* SUB DST, SRC, TMP */
1124 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1125 negate( src( temp ) ) ))
1126 return FALSE;
1127
1128 return TRUE;
1129 }
1130
1131
1132 /**
1133 * Translate the following TGSI CEIL instruction.
1134 * CEIL DST, SRC
1135 * To the following SVGA3D instruction sequence.
1136 * FRC TMP, -SRC
1137 * ADD DST, SRC, TMP
1138 */
1139 static boolean
1140 emit_ceil(struct svga_shader_emitter *emit,
1141 const struct tgsi_full_instruction *insn)
1142 {
1143 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1144 const struct src_register src0 =
1145 translate_src_register(emit, &insn->Src[0]);
1146 SVGA3dShaderDestToken temp = get_temp(emit);
1147
1148 /* FRC TMP, -SRC */
1149 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1150 return FALSE;
1151
1152 /* ADD DST, SRC, TMP */
1153 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1154 return FALSE;
1155
1156 return TRUE;
1157 }
1158
1159
1160 /**
1161 * Translate the following TGSI DIV instruction.
1162 * DIV DST.xy, SRC0, SRC1
1163 * To the following SVGA3D instruction sequence.
1164 * RCP TMP.x, SRC1.xxxx
1165 * RCP TMP.y, SRC1.yyyy
1166 * MUL DST.xy, SRC0, TMP
1167 */
1168 static boolean
1169 emit_div(struct svga_shader_emitter *emit,
1170 const struct tgsi_full_instruction *insn )
1171 {
1172 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1173 const struct src_register src0 =
1174 translate_src_register(emit, &insn->Src[0] );
1175 const struct src_register src1 =
1176 translate_src_register(emit, &insn->Src[1] );
1177 SVGA3dShaderDestToken temp = get_temp( emit );
1178 unsigned i;
1179
1180 /* For each enabled element, perform a RCP instruction. Note that
1181 * RCP is scalar in SVGA3D:
1182 */
1183 for (i = 0; i < 4; i++) {
1184 unsigned channel = 1 << i;
1185 if (dst.mask & channel) {
1186 /* RCP TMP.?, SRC1.???? */
1187 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1188 writemask(temp, channel),
1189 scalar(src1, i) ))
1190 return FALSE;
1191 }
1192 }
1193
1194 /* Vector mul:
1195 * MUL DST, SRC0, TMP
1196 */
1197 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1198 src( temp ) ))
1199 return FALSE;
1200
1201 return TRUE;
1202 }
1203
1204
1205 /**
1206 * Translate the following TGSI DP2 instruction.
1207 * DP2 DST, SRC1, SRC2
1208 * To the following SVGA3D instruction sequence.
1209 * MUL TMP, SRC1, SRC2
1210 * ADD DST, TMP.xxxx, TMP.yyyy
1211 */
1212 static boolean
1213 emit_dp2(struct svga_shader_emitter *emit,
1214 const struct tgsi_full_instruction *insn )
1215 {
1216 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1217 const struct src_register src0 =
1218 translate_src_register(emit, &insn->Src[0]);
1219 const struct src_register src1 =
1220 translate_src_register(emit, &insn->Src[1]);
1221 SVGA3dShaderDestToken temp = get_temp( emit );
1222 struct src_register temp_src0, temp_src1;
1223
1224 /* MUL TMP, SRC1, SRC2 */
1225 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1226 return FALSE;
1227
1228 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1229 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1230
1231 /* ADD DST, TMP.xxxx, TMP.yyyy */
1232 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1233 temp_src0, temp_src1 ))
1234 return FALSE;
1235
1236 return TRUE;
1237 }
1238
1239
1240 /**
1241 * Translate the following TGSI DPH instruction.
1242 * DPH DST, SRC1, SRC2
1243 * To the following SVGA3D instruction sequence.
1244 * DP3 TMP, SRC1, SRC2
1245 * ADD DST, TMP, SRC2.wwww
1246 */
1247 static boolean
1248 emit_dph(struct svga_shader_emitter *emit,
1249 const struct tgsi_full_instruction *insn )
1250 {
1251 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1252 const struct src_register src0 = translate_src_register(
1253 emit, &insn->Src[0] );
1254 struct src_register src1 =
1255 translate_src_register(emit, &insn->Src[1]);
1256 SVGA3dShaderDestToken temp = get_temp( emit );
1257
1258 /* DP3 TMP, SRC1, SRC2 */
1259 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1260 return FALSE;
1261
1262 src1 = scalar(src1, TGSI_SWIZZLE_W);
1263
1264 /* ADD DST, TMP, SRC2.wwww */
1265 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1266 src( temp ), src1 ))
1267 return FALSE;
1268
1269 return TRUE;
1270 }
1271
1272
1273 /**
1274 * Sine / Cosine helper function.
1275 */
1276 static boolean
1277 do_emit_sincos(struct svga_shader_emitter *emit,
1278 SVGA3dShaderDestToken dst,
1279 struct src_register src0)
1280 {
1281 src0 = scalar(src0, TGSI_SWIZZLE_X);
1282 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1283 }
1284
1285
1286 /**
1287 * Translate/emit a TGSI SIN, COS or CSC instruction.
1288 */
1289 static boolean
1290 emit_sincos(struct svga_shader_emitter *emit,
1291 const struct tgsi_full_instruction *insn)
1292 {
1293 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1294 struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
1295 SVGA3dShaderDestToken temp = get_temp( emit );
1296
1297 /* SCS TMP SRC */
1298 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1299 return FALSE;
1300
1301 /* MOV DST TMP */
1302 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1303 return FALSE;
1304
1305 return TRUE;
1306 }
1307
1308
1309 /**
1310 * Translate TGSI SIN instruction into:
1311 * SCS TMP SRC
1312 * MOV DST TMP.yyyy
1313 */
1314 static boolean
1315 emit_sin(struct svga_shader_emitter *emit,
1316 const struct tgsi_full_instruction *insn )
1317 {
1318 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1319 struct src_register src0 =
1320 translate_src_register(emit, &insn->Src[0] );
1321 SVGA3dShaderDestToken temp = get_temp( emit );
1322
1323 /* SCS TMP SRC */
1324 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1325 return FALSE;
1326
1327 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1328
1329 /* MOV DST TMP.yyyy */
1330 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1331 return FALSE;
1332
1333 return TRUE;
1334 }
1335
1336
1337 /*
1338 * Translate TGSI COS instruction into:
1339 * SCS TMP SRC
1340 * MOV DST TMP.xxxx
1341 */
1342 static boolean
1343 emit_cos(struct svga_shader_emitter *emit,
1344 const struct tgsi_full_instruction *insn)
1345 {
1346 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1347 struct src_register src0 =
1348 translate_src_register(emit, &insn->Src[0] );
1349 SVGA3dShaderDestToken temp = get_temp( emit );
1350
1351 /* SCS TMP SRC */
1352 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1353 return FALSE;
1354
1355 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1356
1357 /* MOV DST TMP.xxxx */
1358 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1359 return FALSE;
1360
1361 return TRUE;
1362 }
1363
1364
1365 /**
1366 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1367 */
1368 static boolean
1369 emit_ssg(struct svga_shader_emitter *emit,
1370 const struct tgsi_full_instruction *insn)
1371 {
1372 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1373 struct src_register src0 =
1374 translate_src_register(emit, &insn->Src[0] );
1375 SVGA3dShaderDestToken temp0 = get_temp( emit );
1376 SVGA3dShaderDestToken temp1 = get_temp( emit );
1377 struct src_register zero, one;
1378
1379 if (emit->unit == PIPE_SHADER_VERTEX) {
1380 /* SGN DST, SRC0, TMP0, TMP1 */
1381 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1382 src( temp0 ), src( temp1 ) );
1383 }
1384
1385 one = get_one_immediate(emit);
1386 zero = get_zero_immediate(emit);
1387
1388 /* CMP TMP0, SRC0, one, zero */
1389 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1390 writemask( temp0, dst.mask ), src0, one, zero ))
1391 return FALSE;
1392
1393 /* CMP TMP1, negate(SRC0), negate(one), zero */
1394 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1395 writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1396 zero ))
1397 return FALSE;
1398
1399 /* ADD DST, TMP0, TMP1 */
1400 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1401 src( temp1 ) );
1402 }
1403
1404
1405 /**
1406 * Translate/emit TGSI SUB instruction as:
1407 * ADD DST, SRC0, negate(SRC1)
1408 */
1409 static boolean
1410 emit_sub(struct svga_shader_emitter *emit,
1411 const struct tgsi_full_instruction *insn)
1412 {
1413 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1414 struct src_register src0 = translate_src_register(
1415 emit, &insn->Src[0] );
1416 struct src_register src1 = translate_src_register(
1417 emit, &insn->Src[1] );
1418
1419 src1 = negate(src1);
1420
1421 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1422 src0, src1 ))
1423 return FALSE;
1424
1425 return TRUE;
1426 }
1427
1428
1429 /**
1430 * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
1431 */
1432 static boolean
1433 emit_kill_if(struct svga_shader_emitter *emit,
1434 const struct tgsi_full_instruction *insn)
1435 {
1436 const struct tgsi_full_src_register *reg = &insn->Src[0];
1437 struct src_register src0, srcIn;
1438 const boolean special = (reg->Register.Absolute ||
1439 reg->Register.Negate ||
1440 reg->Register.Indirect ||
1441 reg->Register.SwizzleX != 0 ||
1442 reg->Register.SwizzleY != 1 ||
1443 reg->Register.SwizzleZ != 2 ||
1444 reg->Register.File != TGSI_FILE_TEMPORARY);
1445 SVGA3dShaderDestToken temp;
1446
1447 src0 = srcIn = translate_src_register( emit, reg );
1448
1449 if (special) {
1450 /* need a temp reg */
1451 temp = get_temp( emit );
1452 }
1453
1454 if (special) {
1455 /* move the source into a temp register */
1456 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1457
1458 src0 = src( temp );
1459 }
1460
1461 /* Do the texkill by checking if any of the XYZW components are < 0.
1462 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1463 * only used XYZ. The MSDN documentation about this is incorrect.
1464 */
1465 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1466 return FALSE;
1467
1468 return TRUE;
1469 }
1470
1471
1472 /**
1473 * Translate/emit unconditional kill instruction (usually found inside
1474 * an IF/ELSE/ENDIF block).
1475 */
1476 static boolean
1477 emit_kill(struct svga_shader_emitter *emit,
1478 const struct tgsi_full_instruction *insn)
1479 {
1480 SVGA3dShaderDestToken temp;
1481 struct src_register one = get_one_immediate(emit);
1482 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1483
1484 /* texkill doesn't allow negation on the operand so lets move
1485 * negation of {1} to a temp register */
1486 temp = get_temp( emit );
1487 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1488 negate( one ) ))
1489 return FALSE;
1490
1491 return submit_op0( emit, inst, temp );
1492 }
1493
1494
1495 /**
1496 * Test if r1 and r2 are the same register.
1497 */
1498 static boolean
1499 same_register(struct src_register r1, struct src_register r2)
1500 {
1501 return (r1.base.num == r2.base.num &&
1502 r1.base.type_upper == r2.base.type_upper &&
1503 r1.base.type_lower == r2.base.type_lower);
1504 }
1505
1506
1507
1508 /**
1509 * Implement conditionals by initializing destination reg to 'fail',
1510 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1511 * based on predicate reg.
1512 *
1513 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1514 * MOV dst, fail
1515 * MOV dst, pass, p0
1516 */
1517 static boolean
1518 emit_conditional(struct svga_shader_emitter *emit,
1519 unsigned compare_func,
1520 SVGA3dShaderDestToken dst,
1521 struct src_register src0,
1522 struct src_register src1,
1523 struct src_register pass,
1524 struct src_register fail)
1525 {
1526 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1527 SVGA3dShaderInstToken setp_token;
1528
1529 switch (compare_func) {
1530 case PIPE_FUNC_NEVER:
1531 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1532 dst, fail );
1533 break;
1534 case PIPE_FUNC_LESS:
1535 setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1536 break;
1537 case PIPE_FUNC_EQUAL:
1538 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1539 break;
1540 case PIPE_FUNC_LEQUAL:
1541 setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1542 break;
1543 case PIPE_FUNC_GREATER:
1544 setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1545 break;
1546 case PIPE_FUNC_NOTEQUAL:
1547 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1548 break;
1549 case PIPE_FUNC_GEQUAL:
1550 setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1551 break;
1552 case PIPE_FUNC_ALWAYS:
1553 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1554 dst, pass );
1555 break;
1556 }
1557
1558 if (same_register(src(dst), pass)) {
1559 /* We'll get bad results if the dst and pass registers are the same
1560 * so use a temp register containing pass.
1561 */
1562 SVGA3dShaderDestToken temp = get_temp(emit);
1563 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1564 return FALSE;
1565 pass = src(temp);
1566 }
1567
1568 /* SETP src0, COMPOP, src1 */
1569 if (!submit_op2( emit, setp_token, pred_reg,
1570 src0, src1 ))
1571 return FALSE;
1572
1573 /* MOV dst, fail */
1574 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1575 return FALSE;
1576
1577 /* MOV dst, pass (predicated)
1578 *
1579 * Note that the predicate reg (and possible modifiers) is passed
1580 * as the first source argument.
1581 */
1582 if (!submit_op2(emit,
1583 inst_token_predicated(SVGA3DOP_MOV), dst,
1584 src(pred_reg), pass))
1585 return FALSE;
1586
1587 return TRUE;
1588 }
1589
1590
1591 /**
1592 * Helper for emiting 'selection' commands. Basically:
1593 * if (src0 OP src1)
1594 * dst = 1.0;
1595 * else
1596 * dst = 0.0;
1597 */
1598 static boolean
1599 emit_select(struct svga_shader_emitter *emit,
1600 unsigned compare_func,
1601 SVGA3dShaderDestToken dst,
1602 struct src_register src0,
1603 struct src_register src1 )
1604 {
1605 /* There are some SVGA instructions which implement some selects
1606 * directly, but they are only available in the vertex shader.
1607 */
1608 if (emit->unit == PIPE_SHADER_VERTEX) {
1609 switch (compare_func) {
1610 case PIPE_FUNC_GEQUAL:
1611 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1612 case PIPE_FUNC_LEQUAL:
1613 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1614 case PIPE_FUNC_GREATER:
1615 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1616 case PIPE_FUNC_LESS:
1617 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1618 default:
1619 break;
1620 }
1621 }
1622
1623 /* Otherwise, need to use the setp approach:
1624 */
1625 {
1626 struct src_register one, zero;
1627 /* zero immediate is 0,0,0,1 */
1628 zero = get_zero_immediate(emit);
1629 one = get_one_immediate(emit);
1630
1631 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1632 }
1633 }
1634
1635
1636 /**
1637 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1638 */
1639 static boolean
1640 emit_select_op(struct svga_shader_emitter *emit,
1641 unsigned compare,
1642 const struct tgsi_full_instruction *insn)
1643 {
1644 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1645 struct src_register src0 = translate_src_register(
1646 emit, &insn->Src[0] );
1647 struct src_register src1 = translate_src_register(
1648 emit, &insn->Src[1] );
1649
1650 return emit_select( emit, compare, dst, src0, src1 );
1651 }
1652
1653
1654 /**
1655 * Translate TGSI CMP instruction. Component-wise:
1656 * dst = (src0 < 0.0) ? src1 : src2
1657 */
1658 static boolean
1659 emit_cmp(struct svga_shader_emitter *emit,
1660 const struct tgsi_full_instruction *insn)
1661 {
1662 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1663 const struct src_register src0 =
1664 translate_src_register(emit, &insn->Src[0] );
1665 const struct src_register src1 =
1666 translate_src_register(emit, &insn->Src[1] );
1667 const struct src_register src2 =
1668 translate_src_register(emit, &insn->Src[2] );
1669
1670 if (emit->unit == PIPE_SHADER_VERTEX) {
1671 struct src_register zero = get_zero_immediate(emit);
1672 /* We used to simulate CMP with SLT+LRP. But that didn't work when
1673 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed
1674 * because it involves a CMP to handle the 0 case.
1675 * Use a conditional expression instead.
1676 */
1677 return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1678 src0, zero, src1, src2);
1679 }
1680 else {
1681 assert(emit->unit == PIPE_SHADER_FRAGMENT);
1682
1683 /* CMP DST, SRC0, SRC2, SRC1 */
1684 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1685 src0, src2, src1);
1686 }
1687 }
1688
1689
1690 /**
1691 * Translate/emit 2-operand (coord, sampler) texture instructions.
1692 */
1693 static boolean
1694 emit_tex2(struct svga_shader_emitter *emit,
1695 const struct tgsi_full_instruction *insn,
1696 SVGA3dShaderDestToken dst)
1697 {
1698 SVGA3dShaderInstToken inst;
1699 struct src_register texcoord;
1700 struct src_register sampler;
1701 SVGA3dShaderDestToken tmp;
1702
1703 inst.value = 0;
1704
1705 switch (insn->Instruction.Opcode) {
1706 case TGSI_OPCODE_TEX:
1707 inst.op = SVGA3DOP_TEX;
1708 break;
1709 case TGSI_OPCODE_TXP:
1710 inst.op = SVGA3DOP_TEX;
1711 inst.control = SVGA3DOPCONT_PROJECT;
1712 break;
1713 case TGSI_OPCODE_TXB:
1714 inst.op = SVGA3DOP_TEX;
1715 inst.control = SVGA3DOPCONT_BIAS;
1716 break;
1717 case TGSI_OPCODE_TXL:
1718 inst.op = SVGA3DOP_TEXLDL;
1719 break;
1720 default:
1721 assert(0);
1722 return FALSE;
1723 }
1724
1725 texcoord = translate_src_register( emit, &insn->Src[0] );
1726 sampler = translate_src_register( emit, &insn->Src[1] );
1727
1728 if (emit->key.tex[sampler.base.num].unnormalized ||
1729 emit->dynamic_branching_level > 0)
1730 tmp = get_temp( emit );
1731
1732 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1733 * zero in that case.
1734 */
1735 if (emit->dynamic_branching_level > 0 &&
1736 inst.op == SVGA3DOP_TEX &&
1737 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1738 struct src_register zero = get_zero_immediate(emit);
1739
1740 /* MOV tmp, texcoord */
1741 if (!submit_op1( emit,
1742 inst_token( SVGA3DOP_MOV ),
1743 tmp,
1744 texcoord ))
1745 return FALSE;
1746
1747 /* MOV tmp.w, zero */
1748 if (!submit_op1( emit,
1749 inst_token( SVGA3DOP_MOV ),
1750 writemask( tmp, TGSI_WRITEMASK_W ),
1751 zero ))
1752 return FALSE;
1753
1754 texcoord = src( tmp );
1755 inst.op = SVGA3DOP_TEXLDL;
1756 }
1757
1758 /* Explicit normalization of texcoords:
1759 */
1760 if (emit->key.tex[sampler.base.num].unnormalized) {
1761 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1762
1763 /* MUL tmp, SRC0, WH */
1764 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1765 tmp, texcoord, wh ))
1766 return FALSE;
1767
1768 texcoord = src( tmp );
1769 }
1770
1771 return submit_op2( emit, inst, dst, texcoord, sampler );
1772 }
1773
1774
1775 /**
1776 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1777 */
1778 static boolean
1779 emit_tex4(struct svga_shader_emitter *emit,
1780 const struct tgsi_full_instruction *insn,
1781 SVGA3dShaderDestToken dst )
1782 {
1783 SVGA3dShaderInstToken inst;
1784 struct src_register texcoord;
1785 struct src_register ddx;
1786 struct src_register ddy;
1787 struct src_register sampler;
1788
1789 texcoord = translate_src_register( emit, &insn->Src[0] );
1790 ddx = translate_src_register( emit, &insn->Src[1] );
1791 ddy = translate_src_register( emit, &insn->Src[2] );
1792 sampler = translate_src_register( emit, &insn->Src[3] );
1793
1794 inst.value = 0;
1795
1796 switch (insn->Instruction.Opcode) {
1797 case TGSI_OPCODE_TXD:
1798 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1799 break;
1800 default:
1801 assert(0);
1802 return FALSE;
1803 }
1804
1805 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1806 }
1807
1808
1809 /**
1810 * Emit texture swizzle code. We do this here since SVGA samplers don't
1811 * directly support swizzles.
1812 */
1813 static boolean
1814 emit_tex_swizzle(struct svga_shader_emitter *emit,
1815 SVGA3dShaderDestToken dst,
1816 struct src_register src,
1817 unsigned swizzle_x,
1818 unsigned swizzle_y,
1819 unsigned swizzle_z,
1820 unsigned swizzle_w)
1821 {
1822 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1823 unsigned srcSwizzle[4];
1824 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1825 unsigned i;
1826
1827 /* build writemasks and srcSwizzle terms */
1828 for (i = 0; i < 4; i++) {
1829 if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1830 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1831 zeroWritemask |= (1 << i);
1832 }
1833 else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1834 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1835 oneWritemask |= (1 << i);
1836 }
1837 else {
1838 srcSwizzle[i] = swizzleIn[i];
1839 srcWritemask |= (1 << i);
1840 }
1841 }
1842
1843 /* write x/y/z/w comps */
1844 if (dst.mask & srcWritemask) {
1845 if (!submit_op1(emit,
1846 inst_token(SVGA3DOP_MOV),
1847 writemask(dst, srcWritemask),
1848 swizzle(src,
1849 srcSwizzle[0],
1850 srcSwizzle[1],
1851 srcSwizzle[2],
1852 srcSwizzle[3])))
1853 return FALSE;
1854 }
1855
1856 /* write 0 comps */
1857 if (dst.mask & zeroWritemask) {
1858 if (!submit_op1(emit,
1859 inst_token(SVGA3DOP_MOV),
1860 writemask(dst, zeroWritemask),
1861 get_zero_immediate(emit)))
1862 return FALSE;
1863 }
1864
1865 /* write 1 comps */
1866 if (dst.mask & oneWritemask) {
1867 if (!submit_op1(emit,
1868 inst_token(SVGA3DOP_MOV),
1869 writemask(dst, oneWritemask),
1870 get_one_immediate(emit)))
1871 return FALSE;
1872 }
1873
1874 return TRUE;
1875 }
1876
1877
1878 /**
1879 * Translate/emit a TGSI texture sample instruction.
1880 */
1881 static boolean
1882 emit_tex(struct svga_shader_emitter *emit,
1883 const struct tgsi_full_instruction *insn)
1884 {
1885 SVGA3dShaderDestToken dst =
1886 translate_dst_register( emit, insn, 0 );
1887 struct src_register src0 =
1888 translate_src_register( emit, &insn->Src[0] );
1889 struct src_register src1 =
1890 translate_src_register( emit, &insn->Src[1] );
1891
1892 SVGA3dShaderDestToken tex_result;
1893 const unsigned unit = src1.base.num;
1894
1895 /* check for shadow samplers */
1896 boolean compare = (emit->key.tex[unit].compare_mode ==
1897 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1898
1899 /* texture swizzle */
1900 boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1901 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1902 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1903 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1904
1905 boolean saturate = insn->Instruction.Saturate;
1906
1907 /* If doing compare processing or tex swizzle or saturation, we need to put
1908 * the fetched color into a temporary so it can be used as a source later on.
1909 */
1910 if (compare || swizzle || saturate) {
1911 tex_result = get_temp( emit );
1912 }
1913 else {
1914 tex_result = dst;
1915 }
1916
1917 switch(insn->Instruction.Opcode) {
1918 case TGSI_OPCODE_TEX:
1919 case TGSI_OPCODE_TXB:
1920 case TGSI_OPCODE_TXP:
1921 case TGSI_OPCODE_TXL:
1922 if (!emit_tex2( emit, insn, tex_result ))
1923 return FALSE;
1924 break;
1925 case TGSI_OPCODE_TXD:
1926 if (!emit_tex4( emit, insn, tex_result ))
1927 return FALSE;
1928 break;
1929 default:
1930 assert(0);
1931 }
1932
1933 if (compare) {
1934 SVGA3dShaderDestToken dst2;
1935
1936 if (swizzle || saturate)
1937 dst2 = tex_result;
1938 else
1939 dst2 = dst;
1940
1941 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1942 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1943 /* When sampling a depth texture, the result of the comparison is in
1944 * the Y component.
1945 */
1946 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1947 struct src_register r_coord;
1948
1949 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1950 /* Divide texcoord R by Q */
1951 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1952 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1953 scalar(src0, TGSI_SWIZZLE_W) ))
1954 return FALSE;
1955
1956 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1957 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1958 scalar(src0, TGSI_SWIZZLE_Z),
1959 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1960 return FALSE;
1961
1962 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1963 }
1964 else {
1965 r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1966 }
1967
1968 /* Compare texture sample value against R component of texcoord */
1969 if (!emit_select(emit,
1970 emit->key.tex[unit].compare_func,
1971 writemask( dst2, TGSI_WRITEMASK_XYZ ),
1972 r_coord,
1973 tex_src_x))
1974 return FALSE;
1975 }
1976
1977 if (dst.mask & TGSI_WRITEMASK_W) {
1978 struct src_register one = get_one_immediate(emit);
1979
1980 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1981 writemask( dst2, TGSI_WRITEMASK_W ),
1982 one ))
1983 return FALSE;
1984 }
1985 }
1986
1987 if (saturate && !swizzle) {
1988 /* MOV_SAT real_dst, dst */
1989 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1990 return FALSE;
1991 }
1992 else if (swizzle) {
1993 /* swizzle from tex_result to dst (handles saturation too, if any) */
1994 emit_tex_swizzle(emit,
1995 dst, src(tex_result),
1996 emit->key.tex[unit].swizzle_r,
1997 emit->key.tex[unit].swizzle_g,
1998 emit->key.tex[unit].swizzle_b,
1999 emit->key.tex[unit].swizzle_a);
2000 }
2001
2002 return TRUE;
2003 }
2004
2005
2006 static boolean
2007 emit_bgnloop(struct svga_shader_emitter *emit,
2008 const struct tgsi_full_instruction *insn)
2009 {
2010 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
2011 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
2012 struct src_register const_int = get_loop_const( emit );
2013
2014 emit->dynamic_branching_level++;
2015
2016 return (emit_instruction( emit, inst ) &&
2017 emit_src( emit, loop_reg ) &&
2018 emit_src( emit, const_int ) );
2019 }
2020
2021
2022 static boolean
2023 emit_endloop(struct svga_shader_emitter *emit,
2024 const struct tgsi_full_instruction *insn)
2025 {
2026 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
2027
2028 emit->dynamic_branching_level--;
2029
2030 return emit_instruction( emit, inst );
2031 }
2032
2033
2034 /**
2035 * Translate/emit TGSI BREAK (out of loop) instruction.
2036 */
2037 static boolean
2038 emit_brk(struct svga_shader_emitter *emit,
2039 const struct tgsi_full_instruction *insn)
2040 {
2041 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
2042 return emit_instruction( emit, inst );
2043 }
2044
2045
2046 /**
2047 * Emit simple instruction which operates on one scalar value (not
2048 * a vector). Ex: LG2, RCP, RSQ.
2049 */
2050 static boolean
2051 emit_scalar_op1(struct svga_shader_emitter *emit,
2052 unsigned opcode,
2053 const struct tgsi_full_instruction *insn)
2054 {
2055 SVGA3dShaderInstToken inst;
2056 SVGA3dShaderDestToken dst;
2057 struct src_register src;
2058
2059 inst = inst_token( opcode );
2060 dst = translate_dst_register( emit, insn, 0 );
2061 src = translate_src_register( emit, &insn->Src[0] );
2062 src = scalar( src, TGSI_SWIZZLE_X );
2063
2064 return submit_op1( emit, inst, dst, src );
2065 }
2066
2067
2068 /**
2069 * Translate/emit a simple instruction (one which has no special-case
2070 * code) such as ADD, MUL, MIN, MAX.
2071 */
2072 static boolean
2073 emit_simple_instruction(struct svga_shader_emitter *emit,
2074 unsigned opcode,
2075 const struct tgsi_full_instruction *insn)
2076 {
2077 const struct tgsi_full_src_register *src = insn->Src;
2078 SVGA3dShaderInstToken inst;
2079 SVGA3dShaderDestToken dst;
2080
2081 inst = inst_token( opcode );
2082 dst = translate_dst_register( emit, insn, 0 );
2083
2084 switch (insn->Instruction.NumSrcRegs) {
2085 case 0:
2086 return submit_op0( emit, inst, dst );
2087 case 1:
2088 return submit_op1( emit, inst, dst,
2089 translate_src_register( emit, &src[0] ));
2090 case 2:
2091 return submit_op2( emit, inst, dst,
2092 translate_src_register( emit, &src[0] ),
2093 translate_src_register( emit, &src[1] ) );
2094 case 3:
2095 return submit_op3( emit, inst, dst,
2096 translate_src_register( emit, &src[0] ),
2097 translate_src_register( emit, &src[1] ),
2098 translate_src_register( emit, &src[2] ) );
2099 default:
2100 assert(0);
2101 return FALSE;
2102 }
2103 }
2104
2105
2106 /**
2107 * TGSI_OPCODE_MOVE is only special-cased here to detect the
2108 * svga_fragment_shader::constant_color_output case.
2109 */
2110 static boolean
2111 emit_mov(struct svga_shader_emitter *emit,
2112 const struct tgsi_full_instruction *insn)
2113 {
2114 const struct tgsi_full_src_register *src = &insn->Src[0];
2115 const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2116
2117 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2118 dst->Register.File == TGSI_FILE_OUTPUT &&
2119 dst->Register.Index == 0 &&
2120 src->Register.File == TGSI_FILE_CONSTANT &&
2121 !src->Register.Indirect) {
2122 emit->constant_color_output = TRUE;
2123 }
2124
2125 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2126 }
2127
2128
2129 /**
2130 * Translate/emit TGSI DDX, DDY instructions.
2131 */
2132 static boolean
2133 emit_deriv(struct svga_shader_emitter *emit,
2134 const struct tgsi_full_instruction *insn )
2135 {
2136 if (emit->dynamic_branching_level > 0 &&
2137 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2138 {
2139 SVGA3dShaderDestToken dst =
2140 translate_dst_register( emit, insn, 0 );
2141
2142 /* Deriv opcodes not valid inside dynamic branching, workaround
2143 * by zeroing out the destination.
2144 */
2145 if (!submit_op1(emit,
2146 inst_token( SVGA3DOP_MOV ),
2147 dst,
2148 get_zero_immediate(emit)))
2149 return FALSE;
2150
2151 return TRUE;
2152 }
2153 else {
2154 unsigned opcode;
2155 const struct tgsi_full_src_register *reg = &insn->Src[0];
2156 SVGA3dShaderInstToken inst;
2157 SVGA3dShaderDestToken dst;
2158 struct src_register src0;
2159
2160 switch (insn->Instruction.Opcode) {
2161 case TGSI_OPCODE_DDX:
2162 opcode = SVGA3DOP_DSX;
2163 break;
2164 case TGSI_OPCODE_DDY:
2165 opcode = SVGA3DOP_DSY;
2166 break;
2167 default:
2168 return FALSE;
2169 }
2170
2171 inst = inst_token( opcode );
2172 dst = translate_dst_register( emit, insn, 0 );
2173 src0 = translate_src_register( emit, reg );
2174
2175 /* We cannot use negate or abs on source to dsx/dsy instruction.
2176 */
2177 if (reg->Register.Absolute ||
2178 reg->Register.Negate) {
2179 SVGA3dShaderDestToken temp = get_temp( emit );
2180
2181 if (!emit_repl( emit, temp, &src0 ))
2182 return FALSE;
2183 }
2184
2185 return submit_op1( emit, inst, dst, src0 );
2186 }
2187 }
2188
2189
2190 /**
2191 * Translate/emit ARL (Address Register Load) instruction. Used to
2192 * move a value into the special 'address' register. Used to implement
2193 * indirect/variable indexing into arrays.
2194 */
2195 static boolean
2196 emit_arl(struct svga_shader_emitter *emit,
2197 const struct tgsi_full_instruction *insn)
2198 {
2199 ++emit->current_arl;
2200 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2201 /* MOVA not present in pixel shader instruction set.
2202 * Ignore this instruction altogether since it is
2203 * only used for loop counters -- and for that
2204 * we reference aL directly.
2205 */
2206 return TRUE;
2207 }
2208 if (svga_arl_needs_adjustment( emit )) {
2209 return emit_fake_arl( emit, insn );
2210 } else {
2211 /* no need to adjust, just emit straight arl */
2212 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2213 }
2214 }
2215
2216
2217 static boolean
2218 emit_pow(struct svga_shader_emitter *emit,
2219 const struct tgsi_full_instruction *insn)
2220 {
2221 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2222 struct src_register src0 = translate_src_register(
2223 emit, &insn->Src[0] );
2224 struct src_register src1 = translate_src_register(
2225 emit, &insn->Src[1] );
2226 boolean need_tmp = FALSE;
2227
2228 /* POW can only output to a temporary */
2229 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2230 need_tmp = TRUE;
2231
2232 /* POW src1 must not be the same register as dst */
2233 if (alias_src_dst( src1, dst ))
2234 need_tmp = TRUE;
2235
2236 /* it's a scalar op */
2237 src0 = scalar( src0, TGSI_SWIZZLE_X );
2238 src1 = scalar( src1, TGSI_SWIZZLE_X );
2239
2240 if (need_tmp) {
2241 SVGA3dShaderDestToken tmp =
2242 writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2243
2244 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2245 return FALSE;
2246
2247 return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2248 dst, scalar(src(tmp), 0) );
2249 }
2250 else {
2251 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2252 }
2253 }
2254
2255
2256 /**
2257 * Translate/emit TGSI XPD (vector cross product) instruction.
2258 */
2259 static boolean
2260 emit_xpd(struct svga_shader_emitter *emit,
2261 const struct tgsi_full_instruction *insn)
2262 {
2263 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2264 const struct src_register src0 = translate_src_register(
2265 emit, &insn->Src[0] );
2266 const struct src_register src1 = translate_src_register(
2267 emit, &insn->Src[1] );
2268 boolean need_dst_tmp = FALSE;
2269
2270 /* XPD can only output to a temporary */
2271 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
2272 need_dst_tmp = TRUE;
2273
2274 /* The dst reg must not be the same as src0 or src1*/
2275 if (alias_src_dst(src0, dst) ||
2276 alias_src_dst(src1, dst))
2277 need_dst_tmp = TRUE;
2278
2279 if (need_dst_tmp) {
2280 SVGA3dShaderDestToken tmp = get_temp( emit );
2281
2282 /* Obey DX9 restrictions on mask:
2283 */
2284 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
2285
2286 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
2287 return FALSE;
2288
2289 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2290 return FALSE;
2291 }
2292 else {
2293 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
2294 return FALSE;
2295 }
2296
2297 /* Need to emit 1.0 to dst.w?
2298 */
2299 if (dst.mask & TGSI_WRITEMASK_W) {
2300 struct src_register one = get_one_immediate( emit );
2301
2302 if (!submit_op1(emit,
2303 inst_token( SVGA3DOP_MOV ),
2304 writemask(dst, TGSI_WRITEMASK_W),
2305 one))
2306 return FALSE;
2307 }
2308
2309 return TRUE;
2310 }
2311
2312
2313 /**
2314 * Emit a LRP (linear interpolation) instruction.
2315 */
2316 static boolean
2317 submit_lrp(struct svga_shader_emitter *emit,
2318 SVGA3dShaderDestToken dst,
2319 struct src_register src0,
2320 struct src_register src1,
2321 struct src_register src2)
2322 {
2323 SVGA3dShaderDestToken tmp;
2324 boolean need_dst_tmp = FALSE;
2325
2326 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2327 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2328 alias_src_dst(src0, dst) ||
2329 alias_src_dst(src2, dst))
2330 need_dst_tmp = TRUE;
2331
2332 if (need_dst_tmp) {
2333 tmp = get_temp( emit );
2334 tmp.mask = dst.mask;
2335 }
2336 else {
2337 tmp = dst;
2338 }
2339
2340 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2341 return FALSE;
2342
2343 if (need_dst_tmp) {
2344 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2345 return FALSE;
2346 }
2347
2348 return TRUE;
2349 }
2350
2351
2352 /**
2353 * Translate/emit LRP (Linear Interpolation) instruction.
2354 */
2355 static boolean
2356 emit_lrp(struct svga_shader_emitter *emit,
2357 const struct tgsi_full_instruction *insn)
2358 {
2359 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2360 const struct src_register src0 = translate_src_register(
2361 emit, &insn->Src[0] );
2362 const struct src_register src1 = translate_src_register(
2363 emit, &insn->Src[1] );
2364 const struct src_register src2 = translate_src_register(
2365 emit, &insn->Src[2] );
2366
2367 return submit_lrp(emit, dst, src0, src1, src2);
2368 }
2369
2370 /**
2371 * Translate/emit DST (Distance function) instruction.
2372 */
2373 static boolean
2374 emit_dst_insn(struct svga_shader_emitter *emit,
2375 const struct tgsi_full_instruction *insn)
2376 {
2377 if (emit->unit == PIPE_SHADER_VERTEX) {
2378 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2379 */
2380 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2381 }
2382 else {
2383 /* result[0] = 1 * 1;
2384 * result[1] = a[1] * b[1];
2385 * result[2] = a[2] * 1;
2386 * result[3] = 1 * b[3];
2387 */
2388 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2389 SVGA3dShaderDestToken tmp;
2390 const struct src_register src0 = translate_src_register(
2391 emit, &insn->Src[0] );
2392 const struct src_register src1 = translate_src_register(
2393 emit, &insn->Src[1] );
2394 boolean need_tmp = FALSE;
2395
2396 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2397 alias_src_dst(src0, dst) ||
2398 alias_src_dst(src1, dst))
2399 need_tmp = TRUE;
2400
2401 if (need_tmp) {
2402 tmp = get_temp( emit );
2403 }
2404 else {
2405 tmp = dst;
2406 }
2407
2408 /* tmp.xw = 1.0
2409 */
2410 if (tmp.mask & TGSI_WRITEMASK_XW) {
2411 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2412 writemask(tmp, TGSI_WRITEMASK_XW ),
2413 get_one_immediate(emit)))
2414 return FALSE;
2415 }
2416
2417 /* tmp.yz = src0
2418 */
2419 if (tmp.mask & TGSI_WRITEMASK_YZ) {
2420 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2421 writemask(tmp, TGSI_WRITEMASK_YZ ),
2422 src0))
2423 return FALSE;
2424 }
2425
2426 /* tmp.yw = tmp * src1
2427 */
2428 if (tmp.mask & TGSI_WRITEMASK_YW) {
2429 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2430 writemask(tmp, TGSI_WRITEMASK_YW ),
2431 src(tmp),
2432 src1))
2433 return FALSE;
2434 }
2435
2436 /* dst = tmp
2437 */
2438 if (need_tmp) {
2439 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2440 dst,
2441 src(tmp)))
2442 return FALSE;
2443 }
2444 }
2445
2446 return TRUE;
2447 }
2448
2449
2450 static boolean
2451 emit_exp(struct svga_shader_emitter *emit,
2452 const struct tgsi_full_instruction *insn)
2453 {
2454 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2455 struct src_register src0 =
2456 translate_src_register( emit, &insn->Src[0] );
2457 SVGA3dShaderDestToken fraction;
2458
2459 if (dst.mask & TGSI_WRITEMASK_Y)
2460 fraction = dst;
2461 else if (dst.mask & TGSI_WRITEMASK_X)
2462 fraction = get_temp( emit );
2463 else
2464 fraction.value = 0;
2465
2466 /* If y is being written, fill it with src0 - floor(src0).
2467 */
2468 if (dst.mask & TGSI_WRITEMASK_XY) {
2469 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2470 writemask( fraction, TGSI_WRITEMASK_Y ),
2471 src0 ))
2472 return FALSE;
2473 }
2474
2475 /* If x is being written, fill it with 2 ^ floor(src0).
2476 */
2477 if (dst.mask & TGSI_WRITEMASK_X) {
2478 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2479 writemask( dst, TGSI_WRITEMASK_X ),
2480 src0,
2481 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2482 return FALSE;
2483
2484 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2485 writemask( dst, TGSI_WRITEMASK_X ),
2486 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2487 return FALSE;
2488
2489 if (!(dst.mask & TGSI_WRITEMASK_Y))
2490 release_temp( emit, fraction );
2491 }
2492
2493 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2494 */
2495 if (dst.mask & TGSI_WRITEMASK_Z) {
2496 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2497 writemask( dst, TGSI_WRITEMASK_Z ),
2498 src0 ) )
2499 return FALSE;
2500 }
2501
2502 /* If w is being written, fill it with one.
2503 */
2504 if (dst.mask & TGSI_WRITEMASK_W) {
2505 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2506 writemask(dst, TGSI_WRITEMASK_W),
2507 get_one_immediate(emit)))
2508 return FALSE;
2509 }
2510
2511 return TRUE;
2512 }
2513
2514
2515 /**
2516 * Translate/emit LIT (Lighting helper) instruction.
2517 */
2518 static boolean
2519 emit_lit(struct svga_shader_emitter *emit,
2520 const struct tgsi_full_instruction *insn)
2521 {
2522 if (emit->unit == PIPE_SHADER_VERTEX) {
2523 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2524 */
2525 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2526 }
2527 else {
2528 /* D3D vs. GL semantics can be fairly easily accomodated by
2529 * variations on this sequence.
2530 *
2531 * GL:
2532 * tmp.y = src.x
2533 * tmp.z = pow(src.y,src.w)
2534 * p0 = src0.xxxx > 0
2535 * result = zero.wxxw
2536 * (p0) result.yz = tmp
2537 *
2538 * D3D:
2539 * tmp.y = src.x
2540 * tmp.z = pow(src.y,src.w)
2541 * p0 = src0.xxyy > 0
2542 * result = zero.wxxw
2543 * (p0) result.yz = tmp
2544 *
2545 * Will implement the GL version for now.
2546 */
2547 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2548 SVGA3dShaderDestToken tmp = get_temp( emit );
2549 const struct src_register src0 = translate_src_register(
2550 emit, &insn->Src[0] );
2551
2552 /* tmp = pow(src.y, src.w)
2553 */
2554 if (dst.mask & TGSI_WRITEMASK_Z) {
2555 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2556 tmp,
2557 scalar(src0, 1),
2558 scalar(src0, 3)))
2559 return FALSE;
2560 }
2561
2562 /* tmp.y = src.x
2563 */
2564 if (dst.mask & TGSI_WRITEMASK_Y) {
2565 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2566 writemask(tmp, TGSI_WRITEMASK_Y ),
2567 scalar(src0, 0)))
2568 return FALSE;
2569 }
2570
2571 /* Can't quite do this with emit conditional due to the extra
2572 * writemask on the predicated mov:
2573 */
2574 {
2575 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2576 struct src_register predsrc;
2577
2578 /* D3D vs GL semantics:
2579 */
2580 if (0)
2581 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2582 else
2583 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2584
2585 /* SETP src0.xxyy, GT, {0}.x */
2586 if (!submit_op2( emit,
2587 inst_token_setp(SVGA3DOPCOMP_GT),
2588 pred_reg,
2589 predsrc,
2590 get_zero_immediate(emit)))
2591 return FALSE;
2592
2593 /* MOV dst, fail */
2594 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2595 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2596 return FALSE;
2597
2598 /* MOV dst.yz, tmp (predicated)
2599 *
2600 * Note that the predicate reg (and possible modifiers) is passed
2601 * as the first source argument.
2602 */
2603 if (dst.mask & TGSI_WRITEMASK_YZ) {
2604 if (!submit_op2( emit,
2605 inst_token_predicated(SVGA3DOP_MOV),
2606 writemask(dst, TGSI_WRITEMASK_YZ),
2607 src( pred_reg ), src( tmp ) ))
2608 return FALSE;
2609 }
2610 }
2611 }
2612
2613 return TRUE;
2614 }
2615
2616
2617 static boolean
2618 emit_ex2(struct svga_shader_emitter *emit,
2619 const struct tgsi_full_instruction *insn)
2620 {
2621 SVGA3dShaderInstToken inst;
2622 SVGA3dShaderDestToken dst;
2623 struct src_register src0;
2624
2625 inst = inst_token( SVGA3DOP_EXP );
2626 dst = translate_dst_register( emit, insn, 0 );
2627 src0 = translate_src_register( emit, &insn->Src[0] );
2628 src0 = scalar( src0, TGSI_SWIZZLE_X );
2629
2630 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2631 SVGA3dShaderDestToken tmp = get_temp( emit );
2632
2633 if (!submit_op1( emit, inst, tmp, src0 ))
2634 return FALSE;
2635
2636 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2637 dst,
2638 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2639 }
2640
2641 return submit_op1( emit, inst, dst, src0 );
2642 }
2643
2644
2645 static boolean
2646 emit_log(struct svga_shader_emitter *emit,
2647 const struct tgsi_full_instruction *insn)
2648 {
2649 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2650 struct src_register src0 =
2651 translate_src_register( emit, &insn->Src[0] );
2652 SVGA3dShaderDestToken abs_tmp;
2653 struct src_register abs_src0;
2654 SVGA3dShaderDestToken log2_abs;
2655
2656 abs_tmp.value = 0;
2657
2658 if (dst.mask & TGSI_WRITEMASK_Z)
2659 log2_abs = dst;
2660 else if (dst.mask & TGSI_WRITEMASK_XY)
2661 log2_abs = get_temp( emit );
2662 else
2663 log2_abs.value = 0;
2664
2665 /* If z is being written, fill it with log2( abs( src0 ) ).
2666 */
2667 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2668 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2669 abs_src0 = src0;
2670 else {
2671 abs_tmp = get_temp( emit );
2672
2673 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2674 abs_tmp,
2675 src0 ) )
2676 return FALSE;
2677
2678 abs_src0 = src( abs_tmp );
2679 }
2680
2681 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2682
2683 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2684 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2685 abs_src0 ) )
2686 return FALSE;
2687 }
2688
2689 if (dst.mask & TGSI_WRITEMASK_XY) {
2690 SVGA3dShaderDestToken floor_log2;
2691
2692 if (dst.mask & TGSI_WRITEMASK_X)
2693 floor_log2 = dst;
2694 else
2695 floor_log2 = get_temp( emit );
2696
2697 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2698 */
2699 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2700 writemask( floor_log2, TGSI_WRITEMASK_X ),
2701 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2702 return FALSE;
2703
2704 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2705 writemask( floor_log2, TGSI_WRITEMASK_X ),
2706 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2707 negate( src( floor_log2 ) ) ) )
2708 return FALSE;
2709
2710 /* If y is being written, fill it with
2711 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2712 */
2713 if (dst.mask & TGSI_WRITEMASK_Y) {
2714 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2715 writemask( dst, TGSI_WRITEMASK_Y ),
2716 negate( scalar( src( floor_log2 ),
2717 TGSI_SWIZZLE_X ) ) ) )
2718 return FALSE;
2719
2720 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2721 writemask( dst, TGSI_WRITEMASK_Y ),
2722 src( dst ),
2723 abs_src0 ) )
2724 return FALSE;
2725 }
2726
2727 if (!(dst.mask & TGSI_WRITEMASK_X))
2728 release_temp( emit, floor_log2 );
2729
2730 if (!(dst.mask & TGSI_WRITEMASK_Z))
2731 release_temp( emit, log2_abs );
2732 }
2733
2734 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2735 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2736 release_temp( emit, abs_tmp );
2737
2738 /* If w is being written, fill it with one.
2739 */
2740 if (dst.mask & TGSI_WRITEMASK_W) {
2741 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2742 writemask(dst, TGSI_WRITEMASK_W),
2743 get_one_immediate(emit)))
2744 return FALSE;
2745 }
2746
2747 return TRUE;
2748 }
2749
2750
2751 /**
2752 * Translate TGSI TRUNC or ROUND instruction.
2753 * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2754 * Different approaches are needed for VS versus PS.
2755 */
2756 static boolean
2757 emit_trunc_round(struct svga_shader_emitter *emit,
2758 const struct tgsi_full_instruction *insn,
2759 boolean round)
2760 {
2761 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2762 const struct src_register src0 =
2763 translate_src_register(emit, &insn->Src[0] );
2764 SVGA3dShaderDestToken t1 = get_temp(emit);
2765
2766 if (round) {
2767 SVGA3dShaderDestToken t0 = get_temp(emit);
2768 struct src_register half = get_half_immediate(emit);
2769
2770 /* t0 = abs(src0) + 0.5 */
2771 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2772 absolute(src0), half))
2773 return FALSE;
2774
2775 /* t1 = fract(t0) */
2776 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2777 return FALSE;
2778
2779 /* t1 = t0 - t1 */
2780 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2781 negate(src(t1))))
2782 return FALSE;
2783 }
2784 else {
2785 /* trunc */
2786
2787 /* t1 = fract(abs(src0)) */
2788 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2789 return FALSE;
2790
2791 /* t1 = abs(src0) - t1 */
2792 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2793 negate(src(t1))))
2794 return FALSE;
2795 }
2796
2797 /*
2798 * Now we need to multiply t1 by the sign of the original value.
2799 */
2800 if (emit->unit == PIPE_SHADER_VERTEX) {
2801 /* For VS: use SGN instruction */
2802 /* Need two extra/dummy registers: */
2803 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2804 t4 = get_temp(emit);
2805
2806 /* t2 = sign(src0) */
2807 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2808 src(t3), src(t4)))
2809 return FALSE;
2810
2811 /* dst = t1 * t2 */
2812 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2813 return FALSE;
2814 }
2815 else {
2816 /* For FS: Use CMP instruction */
2817 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2818 src0, src(t1), negate(src(t1)));
2819 }
2820
2821 return TRUE;
2822 }
2823
2824
2825 /**
2826 * Translate/emit "begin subroutine" instruction/marker/label.
2827 */
2828 static boolean
2829 emit_bgnsub(struct svga_shader_emitter *emit,
2830 unsigned position,
2831 const struct tgsi_full_instruction *insn)
2832 {
2833 unsigned i;
2834
2835 /* Note that we've finished the main function and are now emitting
2836 * subroutines. This affects how we terminate the generated
2837 * shader.
2838 */
2839 emit->in_main_func = FALSE;
2840
2841 for (i = 0; i < emit->nr_labels; i++) {
2842 if (emit->label[i] == position) {
2843 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2844 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2845 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2846 }
2847 }
2848
2849 assert(0);
2850 return TRUE;
2851 }
2852
2853
2854 /**
2855 * Translate/emit subroutine call instruction.
2856 */
2857 static boolean
2858 emit_call(struct svga_shader_emitter *emit,
2859 const struct tgsi_full_instruction *insn)
2860 {
2861 unsigned position = insn->Label.Label;
2862 unsigned i;
2863
2864 for (i = 0; i < emit->nr_labels; i++) {
2865 if (emit->label[i] == position)
2866 break;
2867 }
2868
2869 if (emit->nr_labels == Elements(emit->label))
2870 return FALSE;
2871
2872 if (i == emit->nr_labels) {
2873 emit->label[i] = position;
2874 emit->nr_labels++;
2875 }
2876
2877 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2878 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2879 }
2880
2881
2882 /**
2883 * Called at the end of the shader. Actually, emit special "fix-up"
2884 * code for the vertex/fragment shader.
2885 */
2886 static boolean
2887 emit_end(struct svga_shader_emitter *emit)
2888 {
2889 if (emit->unit == PIPE_SHADER_VERTEX) {
2890 return emit_vs_postamble( emit );
2891 }
2892 else {
2893 return emit_ps_postamble( emit );
2894 }
2895 }
2896
2897
2898 /**
2899 * Translate any TGSI instruction to SVGA.
2900 */
2901 static boolean
2902 svga_emit_instruction(struct svga_shader_emitter *emit,
2903 unsigned position,
2904 const struct tgsi_full_instruction *insn)
2905 {
2906 switch (insn->Instruction.Opcode) {
2907
2908 case TGSI_OPCODE_ARL:
2909 return emit_arl( emit, insn );
2910
2911 case TGSI_OPCODE_TEX:
2912 case TGSI_OPCODE_TXB:
2913 case TGSI_OPCODE_TXP:
2914 case TGSI_OPCODE_TXL:
2915 case TGSI_OPCODE_TXD:
2916 return emit_tex( emit, insn );
2917
2918 case TGSI_OPCODE_DDX:
2919 case TGSI_OPCODE_DDY:
2920 return emit_deriv( emit, insn );
2921
2922 case TGSI_OPCODE_BGNSUB:
2923 return emit_bgnsub( emit, position, insn );
2924
2925 case TGSI_OPCODE_ENDSUB:
2926 return TRUE;
2927
2928 case TGSI_OPCODE_CAL:
2929 return emit_call( emit, insn );
2930
2931 case TGSI_OPCODE_FLR:
2932 return emit_floor( emit, insn );
2933
2934 case TGSI_OPCODE_TRUNC:
2935 return emit_trunc_round( emit, insn, FALSE );
2936
2937 case TGSI_OPCODE_ROUND:
2938 return emit_trunc_round( emit, insn, TRUE );
2939
2940 case TGSI_OPCODE_CEIL:
2941 return emit_ceil( emit, insn );
2942
2943 case TGSI_OPCODE_CMP:
2944 return emit_cmp( emit, insn );
2945
2946 case TGSI_OPCODE_DIV:
2947 return emit_div( emit, insn );
2948
2949 case TGSI_OPCODE_DP2:
2950 return emit_dp2( emit, insn );
2951
2952 case TGSI_OPCODE_DPH:
2953 return emit_dph( emit, insn );
2954
2955 case TGSI_OPCODE_COS:
2956 return emit_cos( emit, insn );
2957
2958 case TGSI_OPCODE_SIN:
2959 return emit_sin( emit, insn );
2960
2961 case TGSI_OPCODE_SCS:
2962 return emit_sincos( emit, insn );
2963
2964 case TGSI_OPCODE_END:
2965 /* TGSI always finishes the main func with an END */
2966 return emit_end( emit );
2967
2968 case TGSI_OPCODE_KILL_IF:
2969 return emit_kill_if( emit, insn );
2970
2971 /* Selection opcodes. The underlying language is fairly
2972 * non-orthogonal about these.
2973 */
2974 case TGSI_OPCODE_SEQ:
2975 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2976
2977 case TGSI_OPCODE_SNE:
2978 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2979
2980 case TGSI_OPCODE_SGT:
2981 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2982
2983 case TGSI_OPCODE_SGE:
2984 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2985
2986 case TGSI_OPCODE_SLT:
2987 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2988
2989 case TGSI_OPCODE_SLE:
2990 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2991
2992 case TGSI_OPCODE_SUB:
2993 return emit_sub( emit, insn );
2994
2995 case TGSI_OPCODE_POW:
2996 return emit_pow( emit, insn );
2997
2998 case TGSI_OPCODE_EX2:
2999 return emit_ex2( emit, insn );
3000
3001 case TGSI_OPCODE_EXP:
3002 return emit_exp( emit, insn );
3003
3004 case TGSI_OPCODE_LOG:
3005 return emit_log( emit, insn );
3006
3007 case TGSI_OPCODE_LG2:
3008 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
3009
3010 case TGSI_OPCODE_RSQ:
3011 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
3012
3013 case TGSI_OPCODE_RCP:
3014 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
3015
3016 case TGSI_OPCODE_CONT:
3017 /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */
3018 return FALSE;
3019
3020 case TGSI_OPCODE_RET:
3021 /* This is a noop -- we tell mesa that we can't support RET
3022 * within a function (early return), so this will always be
3023 * followed by an ENDSUB.
3024 */
3025 return TRUE;
3026
3027 /* These aren't actually used by any of the frontends we care
3028 * about:
3029 */
3030 case TGSI_OPCODE_CLAMP:
3031 case TGSI_OPCODE_AND:
3032 case TGSI_OPCODE_OR:
3033 case TGSI_OPCODE_I2F:
3034 case TGSI_OPCODE_NOT:
3035 case TGSI_OPCODE_SHL:
3036 case TGSI_OPCODE_ISHR:
3037 case TGSI_OPCODE_XOR:
3038 return FALSE;
3039
3040 case TGSI_OPCODE_IF:
3041 return emit_if( emit, insn );
3042 case TGSI_OPCODE_ELSE:
3043 return emit_else( emit, insn );
3044 case TGSI_OPCODE_ENDIF:
3045 return emit_endif( emit, insn );
3046
3047 case TGSI_OPCODE_BGNLOOP:
3048 return emit_bgnloop( emit, insn );
3049 case TGSI_OPCODE_ENDLOOP:
3050 return emit_endloop( emit, insn );
3051 case TGSI_OPCODE_BRK:
3052 return emit_brk( emit, insn );
3053
3054 case TGSI_OPCODE_XPD:
3055 return emit_xpd( emit, insn );
3056
3057 case TGSI_OPCODE_KILL:
3058 return emit_kill( emit, insn );
3059
3060 case TGSI_OPCODE_DST:
3061 return emit_dst_insn( emit, insn );
3062
3063 case TGSI_OPCODE_LIT:
3064 return emit_lit( emit, insn );
3065
3066 case TGSI_OPCODE_LRP:
3067 return emit_lrp( emit, insn );
3068
3069 case TGSI_OPCODE_SSG:
3070 return emit_ssg( emit, insn );
3071
3072 case TGSI_OPCODE_MOV:
3073 return emit_mov( emit, insn );
3074
3075 default:
3076 {
3077 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
3078
3079 if (opcode == SVGA3DOP_LAST_INST)
3080 return FALSE;
3081
3082 if (!emit_simple_instruction( emit, opcode, insn ))
3083 return FALSE;
3084 }
3085 }
3086
3087 return TRUE;
3088 }
3089
3090
3091 /**
3092 * Translate/emit a TGSI IMMEDIATE declaration.
3093 * An immediate vector is a constant that's hard-coded into the shader.
3094 */
3095 static boolean
3096 svga_emit_immediate(struct svga_shader_emitter *emit,
3097 const struct tgsi_full_immediate *imm)
3098 {
3099 static const float id[4] = {0,0,0,1};
3100 float value[4];
3101 unsigned i;
3102
3103 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3104 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
3105 float f = imm->u[i].Float;
3106 value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3107 }
3108
3109 /* If the immediate has less than four values, fill in the remaining
3110 * positions from id={0,0,0,1}.
3111 */
3112 for ( ; i < 4; i++ )
3113 value[i] = id[i];
3114
3115 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3116 emit->imm_start + emit->internal_imm_count++,
3117 value[0], value[1], value[2], value[3]);
3118 }
3119
3120
3121 static boolean
3122 make_immediate(struct svga_shader_emitter *emit,
3123 float a, float b, float c, float d,
3124 struct src_register *out )
3125 {
3126 unsigned idx = emit->nr_hw_float_const++;
3127
3128 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3129 idx, a, b, c, d ))
3130 return FALSE;
3131
3132 *out = src_register( SVGA3DREG_CONST, idx );
3133
3134 return TRUE;
3135 }
3136
3137
3138 /**
3139 * Emit special VS instructions at top of shader.
3140 */
3141 static boolean
3142 emit_vs_preamble(struct svga_shader_emitter *emit)
3143 {
3144 if (!emit->key.vs.need_prescale) {
3145 if (!make_immediate( emit, 0, 0, .5, .5,
3146 &emit->imm_0055))
3147 return FALSE;
3148 }
3149
3150 return TRUE;
3151 }
3152
3153
3154 /**
3155 * Emit special PS instructions at top of shader.
3156 */
3157 static boolean
3158 emit_ps_preamble(struct svga_shader_emitter *emit)
3159 {
3160 if (emit->ps_reads_pos && emit->info.reads_z) {
3161 /*
3162 * Assemble the position from various bits of inputs. Depth and W are
3163 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3164 * Also fixup the perspective interpolation.
3165 *
3166 * temp_pos.xy = vPos.xy
3167 * temp_pos.w = rcp(texcoord1.w);
3168 * temp_pos.z = texcoord1.z * temp_pos.w;
3169 */
3170 if (!submit_op1( emit,
3171 inst_token(SVGA3DOP_MOV),
3172 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3173 emit->ps_true_pos ))
3174 return FALSE;
3175
3176 if (!submit_op1( emit,
3177 inst_token(SVGA3DOP_RCP),
3178 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3179 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3180 return FALSE;
3181
3182 if (!submit_op2( emit,
3183 inst_token(SVGA3DOP_MUL),
3184 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3185 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3186 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3187 return FALSE;
3188 }
3189
3190 return TRUE;
3191 }
3192
3193
3194 /**
3195 * Emit special PS instructions at end of shader.
3196 */
3197 static boolean
3198 emit_ps_postamble(struct svga_shader_emitter *emit)
3199 {
3200 unsigned i;
3201
3202 /* PS oDepth is incredibly fragile and it's very hard to catch the
3203 * types of usage that break it during shader emit. Easier just to
3204 * redirect the main program to a temporary and then only touch
3205 * oDepth with a hand-crafted MOV below.
3206 */
3207 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3208 if (!submit_op1( emit,
3209 inst_token(SVGA3DOP_MOV),
3210 emit->true_pos,
3211 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3212 return FALSE;
3213 }
3214
3215 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3216 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3217 /* Potentially override output colors with white for XOR
3218 * logicop workaround.
3219 */
3220 if (emit->unit == PIPE_SHADER_FRAGMENT &&
3221 emit->key.fs.white_fragments) {
3222 struct src_register one = get_one_immediate(emit);
3223
3224 if (!submit_op1( emit,
3225 inst_token(SVGA3DOP_MOV),
3226 emit->true_color_output[i],
3227 one ))
3228 return FALSE;
3229 }
3230 else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3231 i < emit->key.fs.write_color0_to_n_cbufs) {
3232 /* Write temp color output [0] to true output [i] */
3233 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3234 emit->true_color_output[i],
3235 src(emit->temp_color_output[0]))) {
3236 return FALSE;
3237 }
3238 }
3239 else {
3240 if (!submit_op1( emit,
3241 inst_token(SVGA3DOP_MOV),
3242 emit->true_color_output[i],
3243 src(emit->temp_color_output[i]) ))
3244 return FALSE;
3245 }
3246 }
3247 }
3248
3249 return TRUE;
3250 }
3251
3252
3253 /**
3254 * Emit special VS instructions at end of shader.
3255 */
3256 static boolean
3257 emit_vs_postamble(struct svga_shader_emitter *emit)
3258 {
3259 /* PSIZ output is incredibly fragile and it's very hard to catch
3260 * the types of usage that break it during shader emit. Easier
3261 * just to redirect the main program to a temporary and then only
3262 * touch PSIZ with a hand-crafted MOV below.
3263 */
3264 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3265 if (!submit_op1( emit,
3266 inst_token(SVGA3DOP_MOV),
3267 emit->true_psiz,
3268 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3269 return FALSE;
3270 }
3271
3272 /* Need to perform various manipulations on vertex position to cope
3273 * with the different GL and D3D clip spaces.
3274 */
3275 if (emit->key.vs.need_prescale) {
3276 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3277 SVGA3dShaderDestToken depth = emit->depth_pos;
3278 SVGA3dShaderDestToken pos = emit->true_pos;
3279 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3280 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3281 offset + 0 );
3282 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3283 offset + 1 );
3284
3285 if (!submit_op1( emit,
3286 inst_token(SVGA3DOP_MOV),
3287 writemask(depth, TGSI_WRITEMASK_W),
3288 scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3289 return FALSE;
3290
3291 /* MUL temp_pos.xyz, temp_pos, prescale.scale
3292 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3293 * --> Note that prescale.trans.w == 0
3294 */
3295 if (!submit_op2( emit,
3296 inst_token(SVGA3DOP_MUL),
3297 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3298 src(temp_pos),
3299 prescale_scale ))
3300 return FALSE;
3301
3302 if (!submit_op3( emit,
3303 inst_token(SVGA3DOP_MAD),
3304 pos,
3305 swizzle(src(temp_pos), 3, 3, 3, 3),
3306 prescale_trans,
3307 src(temp_pos)))
3308 return FALSE;
3309
3310 /* Also write to depth value */
3311 if (!submit_op3( emit,
3312 inst_token(SVGA3DOP_MAD),
3313 writemask(depth, TGSI_WRITEMASK_Z),
3314 swizzle(src(temp_pos), 3, 3, 3, 3),
3315 prescale_trans,
3316 src(temp_pos) ))
3317 return FALSE;
3318 }
3319 else {
3320 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3321 SVGA3dShaderDestToken depth = emit->depth_pos;
3322 SVGA3dShaderDestToken pos = emit->true_pos;
3323 struct src_register imm_0055 = emit->imm_0055;
3324
3325 /* Adjust GL clipping coordinate space to hardware (D3D-style):
3326 *
3327 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3328 * MOV result.position, temp_pos
3329 */
3330 if (!submit_op2( emit,
3331 inst_token(SVGA3DOP_DP4),
3332 writemask(temp_pos, TGSI_WRITEMASK_Z),
3333 imm_0055,
3334 src(temp_pos) ))
3335 return FALSE;
3336
3337 if (!submit_op1( emit,
3338 inst_token(SVGA3DOP_MOV),
3339 pos,
3340 src(temp_pos) ))
3341 return FALSE;
3342
3343 /* Move the manipulated depth into the extra texcoord reg */
3344 if (!submit_op1( emit,
3345 inst_token(SVGA3DOP_MOV),
3346 writemask(depth, TGSI_WRITEMASK_ZW),
3347 src(temp_pos) ))
3348 return FALSE;
3349 }
3350
3351 return TRUE;
3352 }
3353
3354
3355 /**
3356 * For the pixel shader: emit the code which chooses the front
3357 * or back face color depending on triangle orientation.
3358 * This happens at the top of the fragment shader.
3359 *
3360 * 0: IF VFACE :4
3361 * 1: COLOR = FrontColor;
3362 * 2: ELSE
3363 * 3: COLOR = BackColor;
3364 * 4: ENDIF
3365 */
3366 static boolean
3367 emit_light_twoside(struct svga_shader_emitter *emit)
3368 {
3369 struct src_register vface, zero;
3370 struct src_register front[2];
3371 struct src_register back[2];
3372 SVGA3dShaderDestToken color[2];
3373 int count = emit->internal_color_count;
3374 unsigned i;
3375 SVGA3dShaderInstToken if_token;
3376
3377 if (count == 0)
3378 return TRUE;
3379
3380 vface = get_vface( emit );
3381 zero = get_zero_immediate(emit);
3382
3383 /* Can't use get_temp() to allocate the color reg as such
3384 * temporaries will be reclaimed after each instruction by the call
3385 * to reset_temp_regs().
3386 */
3387 for (i = 0; i < count; i++) {
3388 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3389 front[i] = emit->input_map[emit->internal_color_idx[i]];
3390
3391 /* Back is always the next input:
3392 */
3393 back[i] = front[i];
3394 back[i].base.num = front[i].base.num + 1;
3395
3396 /* Reassign the input_map to the actual front-face color:
3397 */
3398 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3399 }
3400
3401 if_token = inst_token( SVGA3DOP_IFC );
3402
3403 if (emit->key.fs.front_ccw)
3404 if_token.control = SVGA3DOPCOMP_LT;
3405 else
3406 if_token.control = SVGA3DOPCOMP_GT;
3407
3408 if (!(emit_instruction( emit, if_token ) &&
3409 emit_src( emit, vface ) &&
3410 emit_src( emit, zero ) ))
3411 return FALSE;
3412
3413 for (i = 0; i < count; i++) {
3414 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3415 return FALSE;
3416 }
3417
3418 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3419 return FALSE;
3420
3421 for (i = 0; i < count; i++) {
3422 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3423 return FALSE;
3424 }
3425
3426 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3427 return FALSE;
3428
3429 return TRUE;
3430 }
3431
3432
3433 /**
3434 * Emit special setup code for the front/back face register in the FS.
3435 * 0: SETP_GT TEMP, VFACE, 0
3436 * where TEMP is a fake frontface register
3437 */
3438 static boolean
3439 emit_frontface(struct svga_shader_emitter *emit)
3440 {
3441 struct src_register vface;
3442 SVGA3dShaderDestToken temp;
3443 struct src_register pass, fail;
3444
3445 vface = get_vface( emit );
3446
3447 /* Can't use get_temp() to allocate the fake frontface reg as such
3448 * temporaries will be reclaimed after each instruction by the call
3449 * to reset_temp_regs().
3450 */
3451 temp = dst_register( SVGA3DREG_TEMP,
3452 emit->nr_hw_temp++ );
3453
3454 if (emit->key.fs.front_ccw) {
3455 pass = get_zero_immediate(emit);
3456 fail = get_one_immediate(emit);
3457 } else {
3458 pass = get_one_immediate(emit);
3459 fail = get_zero_immediate(emit);
3460 }
3461
3462 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3463 temp, vface, get_zero_immediate(emit),
3464 pass, fail))
3465 return FALSE;
3466
3467 /* Reassign the input_map to the actual front-face color:
3468 */
3469 emit->input_map[emit->internal_frontface_idx] = src(temp);
3470
3471 return TRUE;
3472 }
3473
3474
3475 /**
3476 * Emit code to invert the T component of the incoming texture coordinate.
3477 * This is used for drawing point sprites when
3478 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3479 */
3480 static boolean
3481 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3482 {
3483 unsigned inverted_texcoords = emit->inverted_texcoords;
3484
3485 while (inverted_texcoords) {
3486 const unsigned unit = ffs(inverted_texcoords) - 1;
3487
3488 assert(emit->inverted_texcoords & (1 << unit));
3489
3490 assert(unit < Elements(emit->ps_true_texcoord));
3491
3492 assert(unit < Elements(emit->ps_inverted_texcoord_input));
3493
3494 assert(emit->ps_inverted_texcoord_input[unit]
3495 < Elements(emit->input_map));
3496
3497 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3498 if (!submit_op3(emit,
3499 inst_token(SVGA3DOP_MAD),
3500 dst(emit->ps_inverted_texcoord[unit]),
3501 emit->ps_true_texcoord[unit],
3502 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3503 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3504 return FALSE;
3505
3506 /* Reassign the input_map entry to the new texcoord register */
3507 emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3508 emit->ps_inverted_texcoord[unit];
3509
3510 inverted_texcoords &= ~(1 << unit);
3511 }
3512
3513 return TRUE;
3514 }
3515
3516
3517 /**
3518 * Emit code to adjust vertex shader inputs/attributes:
3519 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3520 * - Set attrib W component = 1.
3521 */
3522 static boolean
3523 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3524 {
3525 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3526 emit->key.vs.adjust_attrib_w_1);
3527
3528 while (adjust_mask) {
3529 /* Adjust vertex attrib range and/or set W component = 1 */
3530 const unsigned index = u_bit_scan(&adjust_mask);
3531 struct src_register tmp;
3532
3533 /* allocate a temp reg */
3534 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3535 emit->nr_hw_temp++;
3536
3537 if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3538 /* The vertex input/attribute is supposed to be a signed value in
3539 * the range [-1,1] but we actually fetched/converted it to the
3540 * range [0,1]. This most likely happens when the app specifies a
3541 * signed byte attribute but we interpreted it as unsigned bytes.
3542 * See also svga_translate_vertex_format().
3543 *
3544 * Here, we emit some extra instructions to adjust
3545 * the attribute values from [0,1] to [-1,1].
3546 *
3547 * The adjustment we implement is:
3548 * new_attrib = attrib * 2.0;
3549 * if (attrib >= 0.5)
3550 * new_attrib = new_attrib - 2.0;
3551 * This isn't exactly right (it's off by a bit or so) but close enough.
3552 */
3553 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3554
3555 /* tmp = attrib * 2.0 */
3556 if (!submit_op2(emit,
3557 inst_token(SVGA3DOP_MUL),
3558 dst(tmp),
3559 emit->input_map[index],
3560 get_two_immediate(emit)))
3561 return FALSE;
3562
3563 /* pred = (attrib >= 0.5) */
3564 if (!submit_op2(emit,
3565 inst_token_setp(SVGA3DOPCOMP_GE),
3566 pred_reg,
3567 emit->input_map[index], /* vert attrib */
3568 get_half_immediate(emit))) /* 0.5 */
3569 return FALSE;
3570
3571 /* sub(pred) tmp, tmp, 2.0 */
3572 if (!submit_op3(emit,
3573 inst_token_predicated(SVGA3DOP_SUB),
3574 dst(tmp),
3575 src(pred_reg),
3576 tmp,
3577 get_two_immediate(emit)))
3578 return FALSE;
3579 }
3580 else {
3581 /* just copy the vertex input attrib to the temp register */
3582 if (!submit_op1(emit,
3583 inst_token(SVGA3DOP_MOV),
3584 dst(tmp),
3585 emit->input_map[index]))
3586 return FALSE;
3587 }
3588
3589 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3590 /* move 1 into W position of tmp */
3591 if (!submit_op1(emit,
3592 inst_token(SVGA3DOP_MOV),
3593 writemask(dst(tmp), TGSI_WRITEMASK_W),
3594 get_one_immediate(emit)))
3595 return FALSE;
3596 }
3597
3598 /* Reassign the input_map entry to the new tmp register */
3599 emit->input_map[index] = tmp;
3600 }
3601
3602 return TRUE;
3603 }
3604
3605
3606 /**
3607 * Determine if we need to create the "common" immediate value which is
3608 * used for generating useful vector constants such as {0,0,0,0} and
3609 * {1,1,1,1}.
3610 * We could just do this all the time except that we want to conserve
3611 * registers whenever possible.
3612 */
3613 static boolean
3614 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3615 {
3616 unsigned i;
3617
3618 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3619 if (emit->key.fs.light_twoside)
3620 return TRUE;
3621
3622 if (emit->key.fs.white_fragments)
3623 return TRUE;
3624
3625 if (emit->emit_frontface)
3626 return TRUE;
3627
3628 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3629 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3630 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3631 return TRUE;
3632
3633 if (emit->inverted_texcoords)
3634 return TRUE;
3635
3636 /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3637 for (i = 0; i < emit->key.num_textures; i++) {
3638 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3639 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3640 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3641 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3642 return TRUE;
3643 }
3644
3645 for (i = 0; i < emit->key.num_textures; i++) {
3646 if (emit->key.tex[i].compare_mode
3647 == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3648 return TRUE;
3649 }
3650 }
3651 else if (emit->unit == PIPE_SHADER_VERTEX) {
3652 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3653 return TRUE;
3654 if (emit->key.vs.adjust_attrib_range ||
3655 emit->key.vs.adjust_attrib_w_1)
3656 return TRUE;
3657 }
3658
3659 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3660 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3661 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3662 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3663 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3664 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3665 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3666 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3667 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3668 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3669 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3670 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3671 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3672 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3673 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1)
3674 return TRUE;
3675
3676 return FALSE;
3677 }
3678
3679
3680 /**
3681 * Do we need to create a looping constant?
3682 */
3683 static boolean
3684 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3685 {
3686 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3687 }
3688
3689
3690 static boolean
3691 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3692 {
3693 return (emit->num_arl_consts > 0);
3694 }
3695
3696
3697 static boolean
3698 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3699 int num, int current_arl)
3700 {
3701 unsigned i;
3702 assert(num < 0);
3703
3704 for (i = 0; i < emit->num_arl_consts; ++i) {
3705 if (emit->arl_consts[i].arl_num == current_arl)
3706 break;
3707 }
3708 /* new entry */
3709 if (emit->num_arl_consts == i) {
3710 ++emit->num_arl_consts;
3711 }
3712 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3713 num :
3714 emit->arl_consts[i].number;
3715 emit->arl_consts[i].arl_num = current_arl;
3716 return TRUE;
3717 }
3718
3719
3720 static boolean
3721 pre_parse_instruction( struct svga_shader_emitter *emit,
3722 const struct tgsi_full_instruction *insn,
3723 int current_arl)
3724 {
3725 if (insn->Src[0].Register.Indirect &&
3726 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3727 const struct tgsi_full_src_register *reg = &insn->Src[0];
3728 if (reg->Register.Index < 0) {
3729 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3730 }
3731 }
3732
3733 if (insn->Src[1].Register.Indirect &&
3734 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3735 const struct tgsi_full_src_register *reg = &insn->Src[1];
3736 if (reg->Register.Index < 0) {
3737 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3738 }
3739 }
3740
3741 if (insn->Src[2].Register.Indirect &&
3742 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3743 const struct tgsi_full_src_register *reg = &insn->Src[2];
3744 if (reg->Register.Index < 0) {
3745 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3746 }
3747 }
3748
3749 return TRUE;
3750 }
3751
3752
3753 static boolean
3754 pre_parse_tokens( struct svga_shader_emitter *emit,
3755 const struct tgsi_token *tokens )
3756 {
3757 struct tgsi_parse_context parse;
3758 int current_arl = 0;
3759
3760 tgsi_parse_init( &parse, tokens );
3761
3762 while (!tgsi_parse_end_of_tokens( &parse )) {
3763 tgsi_parse_token( &parse );
3764 switch (parse.FullToken.Token.Type) {
3765 case TGSI_TOKEN_TYPE_IMMEDIATE:
3766 case TGSI_TOKEN_TYPE_DECLARATION:
3767 break;
3768 case TGSI_TOKEN_TYPE_INSTRUCTION:
3769 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3770 TGSI_OPCODE_ARL) {
3771 ++current_arl;
3772 }
3773 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3774 current_arl ))
3775 return FALSE;
3776 break;
3777 default:
3778 break;
3779 }
3780
3781 }
3782 return TRUE;
3783 }
3784
3785
3786 static boolean
3787 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3788 {
3789 if (needs_to_create_common_immediate( emit )) {
3790 create_common_immediate( emit );
3791 }
3792 if (needs_to_create_loop_const( emit )) {
3793 create_loop_const( emit );
3794 }
3795 if (needs_to_create_arl_consts( emit )) {
3796 create_arl_consts( emit );
3797 }
3798
3799 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3800 if (!emit_ps_preamble( emit ))
3801 return FALSE;
3802
3803 if (emit->key.fs.light_twoside) {
3804 if (!emit_light_twoside( emit ))
3805 return FALSE;
3806 }
3807 if (emit->emit_frontface) {
3808 if (!emit_frontface( emit ))
3809 return FALSE;
3810 }
3811 if (emit->inverted_texcoords) {
3812 if (!emit_inverted_texcoords( emit ))
3813 return FALSE;
3814 }
3815 }
3816 else {
3817 assert(emit->unit == PIPE_SHADER_VERTEX);
3818 if (emit->key.vs.adjust_attrib_range) {
3819 if (!emit_adjusted_vertex_attribs(emit) ||
3820 emit->key.vs.adjust_attrib_w_1) {
3821 return FALSE;
3822 }
3823 }
3824 }
3825
3826 return TRUE;
3827 }
3828
3829
3830 /**
3831 * This is the main entrypoint into the TGSI instruction translater.
3832 * Translate TGSI shader tokens into an SVGA shader.
3833 */
3834 boolean
3835 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3836 const struct tgsi_token *tokens)
3837 {
3838 struct tgsi_parse_context parse;
3839 const struct tgsi_token *new_tokens = NULL;
3840 boolean ret = TRUE;
3841 boolean helpers_emitted = FALSE;
3842 unsigned line_nr = 0;
3843
3844 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3845 unsigned unit;
3846
3847 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3848 TGSI_FILE_INPUT);
3849
3850 if (new_tokens) {
3851 /* Setup texture state for stipple */
3852 emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
3853 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3854 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3855 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3856 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3857
3858 emit->pstipple_sampler_unit = unit;
3859
3860 tokens = new_tokens;
3861 }
3862 }
3863
3864 tgsi_parse_init( &parse, tokens );
3865 emit->internal_imm_count = 0;
3866
3867 if (emit->unit == PIPE_SHADER_VERTEX) {
3868 ret = emit_vs_preamble( emit );
3869 if (!ret)
3870 goto done;
3871 }
3872
3873 pre_parse_tokens(emit, tokens);
3874
3875 while (!tgsi_parse_end_of_tokens( &parse )) {
3876 tgsi_parse_token( &parse );
3877
3878 switch (parse.FullToken.Token.Type) {
3879 case TGSI_TOKEN_TYPE_IMMEDIATE:
3880 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3881 if (!ret)
3882 goto done;
3883 break;
3884
3885 case TGSI_TOKEN_TYPE_DECLARATION:
3886 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3887 if (!ret)
3888 goto done;
3889 break;
3890
3891 case TGSI_TOKEN_TYPE_INSTRUCTION:
3892 if (!helpers_emitted) {
3893 if (!svga_shader_emit_helpers( emit ))
3894 goto done;
3895 helpers_emitted = TRUE;
3896 }
3897 ret = svga_emit_instruction( emit,
3898 line_nr++,
3899 &parse.FullToken.FullInstruction );
3900 if (!ret)
3901 goto done;
3902 break;
3903 default:
3904 break;
3905 }
3906
3907 reset_temp_regs( emit );
3908 }
3909
3910 /* Need to terminate the current subroutine. Note that the
3911 * hardware doesn't tolerate shaders without sub-routines
3912 * terminating with RET+END.
3913 */
3914 if (!emit->in_main_func) {
3915 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3916 if (!ret)
3917 goto done;
3918 }
3919
3920 assert(emit->dynamic_branching_level == 0);
3921
3922 /* Need to terminate the whole shader:
3923 */
3924 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3925 if (!ret)
3926 goto done;
3927
3928 done:
3929 tgsi_parse_free( &parse );
3930 if (new_tokens) {
3931 tgsi_free_tokens(new_tokens);
3932 }
3933
3934 return ret;
3935 }