svga: test register W component in emit_kil()
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_parse.h"
30 #include "util/u_memory.h"
31 #include "util/u_math.h"
32
33 #include "svga_tgsi_emit.h"
34 #include "svga_context.h"
35
36
37 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
38 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
39
40
41
42
43 static unsigned
44 translate_opcode(
45 uint opcode )
46 {
47 switch (opcode) {
48 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
49 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
50 case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC;
51 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
52 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
53 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
54 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
55 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
56 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
57 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
58 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
59 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
60 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
61 case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
62 default:
63 debug_printf("Unkown opcode %u\n", opcode);
64 assert( 0 );
65 return SVGA3DOP_LAST_INST;
66 }
67 }
68
69
70 static unsigned translate_file( unsigned file )
71 {
72 switch (file) {
73 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
74 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
75 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
76 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
77 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
78 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
79 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
80 default:
81 assert( 0 );
82 return SVGA3DREG_TEMP;
83 }
84 }
85
86
87
88
89
90
91 static SVGA3dShaderDestToken
92 translate_dst_register( struct svga_shader_emitter *emit,
93 const struct tgsi_full_instruction *insn,
94 unsigned idx )
95 {
96 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
97 SVGA3dShaderDestToken dest;
98
99 switch (reg->Register.File) {
100 case TGSI_FILE_OUTPUT:
101 /* Output registers encode semantic information in their name.
102 * Need to lookup a table built at decl time:
103 */
104 dest = emit->output_map[reg->Register.Index];
105 break;
106
107 default:
108 dest = dst_register( translate_file( reg->Register.File ),
109 reg->Register.Index );
110 break;
111 }
112
113 dest.mask = reg->Register.WriteMask;
114 assert(dest.mask);
115
116 if (insn->Instruction.Saturate)
117 dest.dstMod = SVGA3DDSTMOD_SATURATE;
118
119 return dest;
120 }
121
122
123 static struct src_register
124 swizzle( struct src_register src,
125 int x,
126 int y,
127 int z,
128 int w )
129 {
130 x = (src.base.swizzle >> (x * 2)) & 0x3;
131 y = (src.base.swizzle >> (y * 2)) & 0x3;
132 z = (src.base.swizzle >> (z * 2)) & 0x3;
133 w = (src.base.swizzle >> (w * 2)) & 0x3;
134
135 src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
136
137 return src;
138 }
139
140 static struct src_register
141 scalar( struct src_register src,
142 int comp )
143 {
144 return swizzle( src, comp, comp, comp, comp );
145 }
146
147 static INLINE boolean
148 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
149 {
150 int i;
151
152 for (i = 0; i < emit->num_arl_consts; ++i) {
153 if (emit->arl_consts[i].arl_num == emit->current_arl)
154 return TRUE;
155 }
156 return FALSE;
157 }
158
159 static INLINE int
160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
161 {
162 int i;
163
164 for (i = 0; i < emit->num_arl_consts; ++i) {
165 if (emit->arl_consts[i].arl_num == emit->current_arl)
166 return emit->arl_consts[i].number;
167 }
168 return 0;
169 }
170
171 static struct src_register
172 translate_src_register( const struct svga_shader_emitter *emit,
173 const struct tgsi_full_src_register *reg )
174 {
175 struct src_register src;
176
177 switch (reg->Register.File) {
178 case TGSI_FILE_INPUT:
179 /* Input registers are referred to by their semantic name rather
180 * than by index. Use the mapping build up from the decls:
181 */
182 src = emit->input_map[reg->Register.Index];
183 break;
184
185 case TGSI_FILE_IMMEDIATE:
186 /* Immediates are appended after TGSI constants in the D3D
187 * constant buffer.
188 */
189 src = src_register( translate_file( reg->Register.File ),
190 reg->Register.Index +
191 emit->imm_start );
192 break;
193
194 default:
195 src = src_register( translate_file( reg->Register.File ),
196 reg->Register.Index );
197
198 break;
199 }
200
201 /* Indirect addressing.
202 */
203 if (reg->Register.Indirect) {
204 if (emit->unit == PIPE_SHADER_FRAGMENT) {
205 /* Pixel shaders have only loop registers for relative
206 * addressing into inputs. Ignore the redundant address
207 * register, the contents of aL should be in sync with it.
208 */
209 if (reg->Register.File == TGSI_FILE_INPUT) {
210 src.base.relAddr = 1;
211 src.indirect = src_token(SVGA3DREG_LOOP, 0);
212 }
213 }
214 else {
215 /* Constant buffers only.
216 */
217 if (reg->Register.File == TGSI_FILE_CONSTANT) {
218 /* we shift the offset towards the minimum */
219 if (svga_arl_needs_adjustment( emit )) {
220 src.base.num -= svga_arl_adjustment( emit );
221 }
222 src.base.relAddr = 1;
223
224 /* Not really sure what should go in the second token:
225 */
226 src.indirect = src_token( SVGA3DREG_ADDR,
227 reg->Indirect.Index );
228
229 src.indirect.swizzle = SWIZZLE_XXXX;
230 }
231 }
232 }
233
234 src = swizzle( src,
235 reg->Register.SwizzleX,
236 reg->Register.SwizzleY,
237 reg->Register.SwizzleZ,
238 reg->Register.SwizzleW );
239
240 /* src.mod isn't a bitfield, unfortunately:
241 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
242 */
243 if (reg->Register.Absolute) {
244 if (reg->Register.Negate)
245 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
246 else
247 src.base.srcMod = SVGA3DSRCMOD_ABS;
248 }
249 else {
250 if (reg->Register.Negate)
251 src.base.srcMod = SVGA3DSRCMOD_NEG;
252 else
253 src.base.srcMod = SVGA3DSRCMOD_NONE;
254 }
255
256 return src;
257 }
258
259
260 /*
261 * Get a temporary register, return -1 if none available
262 */
263 static INLINE SVGA3dShaderDestToken
264 get_temp( struct svga_shader_emitter *emit )
265 {
266 int i = emit->nr_hw_temp + emit->internal_temp_count++;
267
268 return dst_register( SVGA3DREG_TEMP, i );
269 }
270
271 /* Release a single temp. Currently only effective if it was the last
272 * allocated temp, otherwise release will be delayed until the next
273 * call to reset_temp_regs().
274 */
275 static INLINE void
276 release_temp( struct svga_shader_emitter *emit,
277 SVGA3dShaderDestToken temp )
278 {
279 if (temp.num == emit->internal_temp_count - 1)
280 emit->internal_temp_count--;
281 }
282
283 static void reset_temp_regs( struct svga_shader_emitter *emit )
284 {
285 emit->internal_temp_count = 0;
286 }
287
288
289 /* Replace the src with the temporary specified in the dst, but copying
290 * only the necessary channels, and preserving the original swizzle (which is
291 * important given that several opcodes have constraints in the allowed
292 * swizzles).
293 */
294 static boolean emit_repl( struct svga_shader_emitter *emit,
295 SVGA3dShaderDestToken dst,
296 struct src_register *src0)
297 {
298 unsigned src0_swizzle;
299 unsigned chan;
300
301 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
302
303 src0_swizzle = src0->base.swizzle;
304
305 dst.mask = 0;
306 for (chan = 0; chan < 4; ++chan) {
307 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
308 dst.mask |= 1 << swizzle;
309 }
310 assert(dst.mask);
311
312 src0->base.swizzle = SVGA3DSWIZZLE_NONE;
313
314 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
315 return FALSE;
316
317 *src0 = src( dst );
318 src0->base.swizzle = src0_swizzle;
319
320 return TRUE;
321 }
322
323
324 static boolean submit_op0( struct svga_shader_emitter *emit,
325 SVGA3dShaderInstToken inst,
326 SVGA3dShaderDestToken dest )
327 {
328 return (emit_instruction( emit, inst ) &&
329 emit_dst( emit, dest ));
330 }
331
332 static boolean submit_op1( struct svga_shader_emitter *emit,
333 SVGA3dShaderInstToken inst,
334 SVGA3dShaderDestToken dest,
335 struct src_register src0 )
336 {
337 return emit_op1( emit, inst, dest, src0 );
338 }
339
340
341 /* SVGA shaders may not refer to >1 constant register in a single
342 * instruction. This function checks for that usage and inserts a
343 * move to temporary if detected.
344 *
345 * The same applies to input registers -- at most a single input
346 * register may be read by any instruction.
347 */
348 static boolean submit_op2( struct svga_shader_emitter *emit,
349 SVGA3dShaderInstToken inst,
350 SVGA3dShaderDestToken dest,
351 struct src_register src0,
352 struct src_register src1 )
353 {
354 SVGA3dShaderDestToken temp;
355 SVGA3dShaderRegType type0, type1;
356 boolean need_temp = FALSE;
357
358 temp.value = 0;
359 type0 = SVGA3dShaderGetRegType( src0.base.value );
360 type1 = SVGA3dShaderGetRegType( src1.base.value );
361
362 if (type0 == SVGA3DREG_CONST &&
363 type1 == SVGA3DREG_CONST &&
364 src0.base.num != src1.base.num)
365 need_temp = TRUE;
366
367 if (type0 == SVGA3DREG_INPUT &&
368 type1 == SVGA3DREG_INPUT &&
369 src0.base.num != src1.base.num)
370 need_temp = TRUE;
371
372 if (need_temp) {
373 temp = get_temp( emit );
374
375 if (!emit_repl( emit, temp, &src0 ))
376 return FALSE;
377 }
378
379 if (!emit_op2( emit, inst, dest, src0, src1 ))
380 return FALSE;
381
382 if (need_temp)
383 release_temp( emit, temp );
384
385 return TRUE;
386 }
387
388
389 /* SVGA shaders may not refer to >1 constant register in a single
390 * instruction. This function checks for that usage and inserts a
391 * move to temporary if detected.
392 */
393 static boolean submit_op3( struct svga_shader_emitter *emit,
394 SVGA3dShaderInstToken inst,
395 SVGA3dShaderDestToken dest,
396 struct src_register src0,
397 struct src_register src1,
398 struct src_register src2 )
399 {
400 SVGA3dShaderDestToken temp0;
401 SVGA3dShaderDestToken temp1;
402 boolean need_temp0 = FALSE;
403 boolean need_temp1 = FALSE;
404 SVGA3dShaderRegType type0, type1, type2;
405
406 temp0.value = 0;
407 temp1.value = 0;
408 type0 = SVGA3dShaderGetRegType( src0.base.value );
409 type1 = SVGA3dShaderGetRegType( src1.base.value );
410 type2 = SVGA3dShaderGetRegType( src2.base.value );
411
412 if (inst.op != SVGA3DOP_SINCOS) {
413 if (type0 == SVGA3DREG_CONST &&
414 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
415 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
416 need_temp0 = TRUE;
417
418 if (type1 == SVGA3DREG_CONST &&
419 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
420 need_temp1 = TRUE;
421 }
422
423 if (type0 == SVGA3DREG_INPUT &&
424 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
425 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
426 need_temp0 = TRUE;
427
428 if (type1 == SVGA3DREG_INPUT &&
429 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
430 need_temp1 = TRUE;
431
432 if (need_temp0) {
433 temp0 = get_temp( emit );
434
435 if (!emit_repl( emit, temp0, &src0 ))
436 return FALSE;
437 }
438
439 if (need_temp1) {
440 temp1 = get_temp( emit );
441
442 if (!emit_repl( emit, temp1, &src1 ))
443 return FALSE;
444 }
445
446 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
447 return FALSE;
448
449 if (need_temp1)
450 release_temp( emit, temp1 );
451 if (need_temp0)
452 release_temp( emit, temp0 );
453 return TRUE;
454 }
455
456
457
458
459 /* SVGA shaders may not refer to >1 constant register in a single
460 * instruction. This function checks for that usage and inserts a
461 * move to temporary if detected.
462 */
463 static boolean submit_op4( struct svga_shader_emitter *emit,
464 SVGA3dShaderInstToken inst,
465 SVGA3dShaderDestToken dest,
466 struct src_register src0,
467 struct src_register src1,
468 struct src_register src2,
469 struct src_register src3)
470 {
471 SVGA3dShaderDestToken temp0;
472 SVGA3dShaderDestToken temp3;
473 boolean need_temp0 = FALSE;
474 boolean need_temp3 = FALSE;
475 SVGA3dShaderRegType type0, type1, type2, type3;
476
477 temp0.value = 0;
478 temp3.value = 0;
479 type0 = SVGA3dShaderGetRegType( src0.base.value );
480 type1 = SVGA3dShaderGetRegType( src1.base.value );
481 type2 = SVGA3dShaderGetRegType( src2.base.value );
482 type3 = SVGA3dShaderGetRegType( src2.base.value );
483
484 /* Make life a little easier - this is only used by the TXD
485 * instruction which is guaranteed not to have a constant/input reg
486 * in one slot at least:
487 */
488 assert(type1 == SVGA3DREG_SAMPLER);
489
490 if (type0 == SVGA3DREG_CONST &&
491 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
492 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
493 need_temp0 = TRUE;
494
495 if (type3 == SVGA3DREG_CONST &&
496 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
497 need_temp3 = TRUE;
498
499 if (type0 == SVGA3DREG_INPUT &&
500 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
501 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
502 need_temp0 = TRUE;
503
504 if (type3 == SVGA3DREG_INPUT &&
505 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
506 need_temp3 = TRUE;
507
508 if (need_temp0) {
509 temp0 = get_temp( emit );
510
511 if (!emit_repl( emit, temp0, &src0 ))
512 return FALSE;
513 }
514
515 if (need_temp3) {
516 temp3 = get_temp( emit );
517
518 if (!emit_repl( emit, temp3, &src3 ))
519 return FALSE;
520 }
521
522 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
523 return FALSE;
524
525 if (need_temp3)
526 release_temp( emit, temp3 );
527 if (need_temp0)
528 release_temp( emit, temp0 );
529 return TRUE;
530 }
531
532
533 static boolean alias_src_dst( struct src_register src,
534 SVGA3dShaderDestToken dst )
535 {
536 if (src.base.num != dst.num)
537 return FALSE;
538
539 if (SVGA3dShaderGetRegType(dst.value) !=
540 SVGA3dShaderGetRegType(src.base.value))
541 return FALSE;
542
543 return TRUE;
544 }
545
546
547 static boolean submit_lrp(struct svga_shader_emitter *emit,
548 SVGA3dShaderDestToken dst,
549 struct src_register src0,
550 struct src_register src1,
551 struct src_register src2)
552 {
553 SVGA3dShaderDestToken tmp;
554 boolean need_dst_tmp = FALSE;
555
556 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
557 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
558 alias_src_dst(src0, dst) ||
559 alias_src_dst(src2, dst))
560 need_dst_tmp = TRUE;
561
562 if (need_dst_tmp) {
563 tmp = get_temp( emit );
564 tmp.mask = dst.mask;
565 }
566 else {
567 tmp = dst;
568 }
569
570 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
571 return FALSE;
572
573 if (need_dst_tmp) {
574 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
575 return FALSE;
576 }
577
578 return TRUE;
579 }
580
581
582 static boolean emit_def_const( struct svga_shader_emitter *emit,
583 SVGA3dShaderConstType type,
584 unsigned idx,
585 float a,
586 float b,
587 float c,
588 float d )
589 {
590 SVGA3DOpDefArgs def;
591 SVGA3dShaderInstToken opcode;
592
593 switch (type) {
594 case SVGA3D_CONST_TYPE_FLOAT:
595 opcode = inst_token( SVGA3DOP_DEF );
596 def.dst = dst_register( SVGA3DREG_CONST, idx );
597 def.constValues[0] = a;
598 def.constValues[1] = b;
599 def.constValues[2] = c;
600 def.constValues[3] = d;
601 break;
602 case SVGA3D_CONST_TYPE_INT:
603 opcode = inst_token( SVGA3DOP_DEFI );
604 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
605 def.constIValues[0] = (int)a;
606 def.constIValues[1] = (int)b;
607 def.constIValues[2] = (int)c;
608 def.constIValues[3] = (int)d;
609 break;
610 default:
611 assert(0);
612 opcode = inst_token( SVGA3DOP_NOP );
613 break;
614 }
615
616 if (!emit_instruction(emit, opcode) ||
617 !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
618 return FALSE;
619
620 return TRUE;
621 }
622
623 static INLINE boolean
624 create_zero_immediate( struct svga_shader_emitter *emit )
625 {
626 unsigned idx = emit->nr_hw_float_const++;
627
628 /* Emit the constant (0, 0, -1, 1) and use swizzling to generate
629 * other useful vectors.
630 */
631 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
632 idx, 0, 0, -1, 1 ))
633 return FALSE;
634
635 emit->zero_immediate_idx = idx;
636 emit->created_zero_immediate = TRUE;
637
638 return TRUE;
639 }
640
641 static INLINE boolean
642 create_loop_const( struct svga_shader_emitter *emit )
643 {
644 unsigned idx = emit->nr_hw_int_const++;
645
646 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
647 255, /* iteration count */
648 0, /* initial value */
649 1, /* step size */
650 0 /* not used, must be 0 */))
651 return FALSE;
652
653 emit->loop_const_idx = idx;
654 emit->created_loop_const = TRUE;
655
656 return TRUE;
657 }
658
659 static INLINE boolean
660 create_sincos_consts( struct svga_shader_emitter *emit )
661 {
662 unsigned idx = emit->nr_hw_float_const++;
663
664 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
665 -1.5500992e-006f,
666 -2.1701389e-005f,
667 0.0026041667f,
668 0.00026041668f ))
669 return FALSE;
670
671 emit->sincos_consts_idx = idx;
672 idx = emit->nr_hw_float_const++;
673
674 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
675 -0.020833334f,
676 -0.12500000f,
677 1.0f,
678 0.50000000f ))
679 return FALSE;
680
681 emit->created_sincos_consts = TRUE;
682
683 return TRUE;
684 }
685
686 static INLINE boolean
687 create_arl_consts( struct svga_shader_emitter *emit )
688 {
689 int i;
690
691 for (i = 0; i < emit->num_arl_consts; i += 4) {
692 int j;
693 unsigned idx = emit->nr_hw_float_const++;
694 float vals[4];
695 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
696 vals[j] = emit->arl_consts[i + j].number;
697 emit->arl_consts[i + j].idx = idx;
698 switch (j) {
699 case 0:
700 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
701 break;
702 case 1:
703 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
704 break;
705 case 2:
706 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
707 break;
708 case 3:
709 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
710 break;
711 }
712 }
713 while (j < 4)
714 vals[j++] = 0;
715
716 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
717 vals[0], vals[1],
718 vals[2], vals[3]))
719 return FALSE;
720 }
721
722 return TRUE;
723 }
724
725 static INLINE struct src_register
726 get_vface( struct svga_shader_emitter *emit )
727 {
728 assert(emit->emitted_vface);
729 return src_register(SVGA3DREG_MISCTYPE,
730 SVGA3DMISCREG_FACE);
731 }
732
733 /* returns {0, 0, 0, 1} immediate */
734 static INLINE struct src_register
735 get_zero_immediate( struct svga_shader_emitter *emit )
736 {
737 assert(emit->created_zero_immediate);
738 assert(emit->zero_immediate_idx >= 0);
739 return swizzle(src_register( SVGA3DREG_CONST,
740 emit->zero_immediate_idx),
741 0, 0, 0, 3);
742 }
743
744 /* returns {1, 1, 1, -1} immediate */
745 static INLINE struct src_register
746 get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
747 {
748 assert(emit->created_zero_immediate);
749 assert(emit->zero_immediate_idx >= 0);
750 return swizzle(src_register( SVGA3DREG_CONST,
751 emit->zero_immediate_idx),
752 3, 3, 3, 2);
753 }
754
755 /* returns the loop const */
756 static INLINE struct src_register
757 get_loop_const( struct svga_shader_emitter *emit )
758 {
759 assert(emit->created_loop_const);
760 assert(emit->loop_const_idx >= 0);
761 return src_register( SVGA3DREG_CONSTINT,
762 emit->loop_const_idx );
763 }
764
765 /* returns a sincos const */
766 static INLINE struct src_register
767 get_sincos_const( struct svga_shader_emitter *emit,
768 unsigned index )
769 {
770 assert(emit->created_sincos_consts);
771 assert(emit->sincos_consts_idx >= 0);
772 assert(index == 0 || index == 1);
773 return src_register( SVGA3DREG_CONST,
774 emit->sincos_consts_idx + index );
775 }
776
777 static INLINE struct src_register
778 get_fake_arl_const( struct svga_shader_emitter *emit )
779 {
780 struct src_register reg;
781 int idx = 0, swizzle = 0, i;
782
783 for (i = 0; i < emit->num_arl_consts; ++ i) {
784 if (emit->arl_consts[i].arl_num == emit->current_arl) {
785 idx = emit->arl_consts[i].idx;
786 swizzle = emit->arl_consts[i].swizzle;
787 }
788 }
789
790 reg = src_register( SVGA3DREG_CONST, idx );
791 return scalar(reg, swizzle);
792 }
793
794 static INLINE struct src_register
795 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
796 {
797 int idx;
798 struct src_register reg;
799
800 /* the width/height indexes start right after constants */
801 idx = emit->key.fkey.tex[sampler_num].width_height_idx +
802 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
803
804 reg = src_register( SVGA3DREG_CONST, idx );
805 return reg;
806 }
807
808 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
809 const struct tgsi_full_instruction *insn)
810 {
811 const struct src_register src0 = translate_src_register(
812 emit, &insn->Src[0] );
813 struct src_register src1 = get_fake_arl_const( emit );
814 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
815 SVGA3dShaderDestToken tmp = get_temp( emit );
816
817 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
818 return FALSE;
819
820 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
821 src1))
822 return FALSE;
823
824 /* replicate the original swizzle */
825 src1 = src(tmp);
826 src1.base.swizzle = src0.base.swizzle;
827
828 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
829 dst, src1 );
830 }
831
832 static boolean emit_if(struct svga_shader_emitter *emit,
833 const struct tgsi_full_instruction *insn)
834 {
835 struct src_register src0 = translate_src_register(
836 emit, &insn->Src[0] );
837 struct src_register zero = get_zero_immediate( emit );
838 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
839
840 if_token.control = SVGA3DOPCOMPC_NE;
841 zero = scalar(zero, TGSI_SWIZZLE_X);
842
843 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
844 /*
845 * Max different constant registers readable per IFC instruction is 1.
846 */
847
848 SVGA3dShaderDestToken tmp = get_temp( emit );
849
850 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
851 return FALSE;
852
853 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
854 }
855
856 emit->dynamic_branching_level++;
857
858 return (emit_instruction( emit, if_token ) &&
859 emit_src( emit, src0 ) &&
860 emit_src( emit, zero ) );
861 }
862
863 static boolean emit_endif(struct svga_shader_emitter *emit,
864 const struct tgsi_full_instruction *insn)
865 {
866 emit->dynamic_branching_level--;
867
868 return (emit_instruction( emit,
869 inst_token( SVGA3DOP_ENDIF )));
870 }
871
872 static boolean emit_else(struct svga_shader_emitter *emit,
873 const struct tgsi_full_instruction *insn)
874 {
875 return (emit_instruction( emit,
876 inst_token( SVGA3DOP_ELSE )));
877 }
878
879 /* Translate the following TGSI FLR instruction.
880 * FLR DST, SRC
881 * To the following SVGA3D instruction sequence.
882 * FRC TMP, SRC
883 * SUB DST, SRC, TMP
884 */
885 static boolean emit_floor(struct svga_shader_emitter *emit,
886 const struct tgsi_full_instruction *insn )
887 {
888 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
889 const struct src_register src0 = translate_src_register(
890 emit, &insn->Src[0] );
891 SVGA3dShaderDestToken temp = get_temp( emit );
892
893 /* FRC TMP, SRC */
894 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
895 return FALSE;
896
897 /* SUB DST, SRC, TMP */
898 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
899 negate( src( temp ) ) ))
900 return FALSE;
901
902 return TRUE;
903 }
904
905
906 /* Translate the following TGSI CMP instruction.
907 * CMP DST, SRC0, SRC1, SRC2
908 * To the following SVGA3D instruction sequence.
909 * CMP DST, SRC0, SRC2, SRC1
910 */
911 static boolean emit_cmp(struct svga_shader_emitter *emit,
912 const struct tgsi_full_instruction *insn )
913 {
914 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
915 const struct src_register src0 = translate_src_register(
916 emit, &insn->Src[0] );
917 const struct src_register src1 = translate_src_register(
918 emit, &insn->Src[1] );
919 const struct src_register src2 = translate_src_register(
920 emit, &insn->Src[2] );
921
922 if (emit->unit == PIPE_SHADER_VERTEX) {
923 SVGA3dShaderDestToken temp = get_temp(emit);
924 struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
925
926 /* Since vertex shaders don't support the CMP instruction,
927 * simulate it with SLT and LRP instructions.
928 * SLT TMP, SRC0, 0.0
929 * LRP DST, TMP, SRC1, SRC2
930 */
931 if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
932 return FALSE;
933 return submit_lrp(emit, dst, src(temp), src1, src2);
934 }
935
936 /* CMP DST, SRC0, SRC2, SRC1 */
937 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
938 }
939
940
941
942 /* Translate the following TGSI DIV instruction.
943 * DIV DST.xy, SRC0, SRC1
944 * To the following SVGA3D instruction sequence.
945 * RCP TMP.x, SRC1.xxxx
946 * RCP TMP.y, SRC1.yyyy
947 * MUL DST.xy, SRC0, TMP
948 */
949 static boolean emit_div(struct svga_shader_emitter *emit,
950 const struct tgsi_full_instruction *insn )
951 {
952 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
953 const struct src_register src0 = translate_src_register(
954 emit, &insn->Src[0] );
955 const struct src_register src1 = translate_src_register(
956 emit, &insn->Src[1] );
957 SVGA3dShaderDestToken temp = get_temp( emit );
958 int i;
959
960 /* For each enabled element, perform a RCP instruction. Note that
961 * RCP is scalar in SVGA3D:
962 */
963 for (i = 0; i < 4; i++) {
964 unsigned channel = 1 << i;
965 if (dst.mask & channel) {
966 /* RCP TMP.?, SRC1.???? */
967 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
968 writemask(temp, channel),
969 scalar(src1, i) ))
970 return FALSE;
971 }
972 }
973
974 /* Then multiply them out with a single mul:
975 *
976 * MUL DST, SRC0, TMP
977 */
978 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
979 src( temp ) ))
980 return FALSE;
981
982 return TRUE;
983 }
984
985 /* Translate the following TGSI DP2 instruction.
986 * DP2 DST, SRC1, SRC2
987 * To the following SVGA3D instruction sequence.
988 * MUL TMP, SRC1, SRC2
989 * ADD DST, TMP.xxxx, TMP.yyyy
990 */
991 static boolean emit_dp2(struct svga_shader_emitter *emit,
992 const struct tgsi_full_instruction *insn )
993 {
994 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
995 const struct src_register src0 = translate_src_register(
996 emit, &insn->Src[0] );
997 const struct src_register src1 = translate_src_register(
998 emit, &insn->Src[1] );
999 SVGA3dShaderDestToken temp = get_temp( emit );
1000 struct src_register temp_src0, temp_src1;
1001
1002 /* MUL TMP, SRC1, SRC2 */
1003 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1004 return FALSE;
1005
1006 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1007 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1008
1009 /* ADD DST, TMP.xxxx, TMP.yyyy */
1010 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1011 temp_src0, temp_src1 ))
1012 return FALSE;
1013
1014 return TRUE;
1015 }
1016
1017
1018 /* Translate the following TGSI DPH instruction.
1019 * DPH DST, SRC1, SRC2
1020 * To the following SVGA3D instruction sequence.
1021 * DP3 TMP, SRC1, SRC2
1022 * ADD DST, TMP, SRC2.wwww
1023 */
1024 static boolean emit_dph(struct svga_shader_emitter *emit,
1025 const struct tgsi_full_instruction *insn )
1026 {
1027 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1028 const struct src_register src0 = translate_src_register(
1029 emit, &insn->Src[0] );
1030 struct src_register src1 = translate_src_register(
1031 emit, &insn->Src[1] );
1032 SVGA3dShaderDestToken temp = get_temp( emit );
1033
1034 /* DP3 TMP, SRC1, SRC2 */
1035 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
1036 return FALSE;
1037
1038 src1 = scalar(src1, TGSI_SWIZZLE_W);
1039
1040 /* ADD DST, TMP, SRC2.wwww */
1041 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1042 src( temp ), src1 ))
1043 return FALSE;
1044
1045 return TRUE;
1046 }
1047
1048 /* Translate the following TGSI DST instruction.
1049 * NRM DST, SRC
1050 * To the following SVGA3D instruction sequence.
1051 * DP3 TMP, SRC, SRC
1052 * RSQ TMP, TMP
1053 * MUL DST, SRC, TMP
1054 */
1055 static boolean emit_nrm(struct svga_shader_emitter *emit,
1056 const struct tgsi_full_instruction *insn )
1057 {
1058 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1059 const struct src_register src0 = translate_src_register(
1060 emit, &insn->Src[0] );
1061 SVGA3dShaderDestToken temp = get_temp( emit );
1062
1063 /* DP3 TMP, SRC, SRC */
1064 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
1065 return FALSE;
1066
1067 /* RSQ TMP, TMP */
1068 if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
1069 return FALSE;
1070
1071 /* MUL DST, SRC, TMP */
1072 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
1073 src0, src( temp )))
1074 return FALSE;
1075
1076 return TRUE;
1077
1078 }
1079
1080 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
1081 SVGA3dShaderDestToken dst,
1082 struct src_register src0)
1083 {
1084 src0 = scalar(src0, TGSI_SWIZZLE_X);
1085
1086 if (emit->use_sm30) {
1087 return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
1088 dst, src0 );
1089 } else {
1090 struct src_register const1 = get_sincos_const( emit, 0 );
1091 struct src_register const2 = get_sincos_const( emit, 1 );
1092
1093 return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
1094 dst, src0, const1, const2 );
1095 }
1096 }
1097
1098 static boolean emit_sincos(struct svga_shader_emitter *emit,
1099 const struct tgsi_full_instruction *insn)
1100 {
1101 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1102 struct src_register src0 = translate_src_register(
1103 emit, &insn->Src[0] );
1104 SVGA3dShaderDestToken temp = get_temp( emit );
1105
1106 /* SCS TMP SRC */
1107 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1108 return FALSE;
1109
1110 /* MOV DST TMP */
1111 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1112 return FALSE;
1113
1114 return TRUE;
1115 }
1116
1117 /*
1118 * SCS TMP SRC
1119 * MOV DST TMP.yyyy
1120 */
1121 static boolean emit_sin(struct svga_shader_emitter *emit,
1122 const struct tgsi_full_instruction *insn )
1123 {
1124 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1125 struct src_register src0 = translate_src_register(
1126 emit, &insn->Src[0] );
1127 SVGA3dShaderDestToken temp = get_temp( emit );
1128
1129 /* SCS TMP SRC */
1130 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1131 return FALSE;
1132
1133 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1134
1135 /* MOV DST TMP.yyyy */
1136 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1137 return FALSE;
1138
1139 return TRUE;
1140 }
1141
1142 /*
1143 * SCS TMP SRC
1144 * MOV DST TMP.xxxx
1145 */
1146 static boolean emit_cos(struct svga_shader_emitter *emit,
1147 const struct tgsi_full_instruction *insn )
1148 {
1149 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1150 struct src_register src0 = translate_src_register(
1151 emit, &insn->Src[0] );
1152 SVGA3dShaderDestToken temp = get_temp( emit );
1153
1154 /* SCS TMP SRC */
1155 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1156 return FALSE;
1157
1158 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1159
1160 /* MOV DST TMP.xxxx */
1161 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1162 return FALSE;
1163
1164 return TRUE;
1165 }
1166
1167 static boolean emit_ssg(struct svga_shader_emitter *emit,
1168 const struct tgsi_full_instruction *insn )
1169 {
1170 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1171 struct src_register src0 = translate_src_register(
1172 emit, &insn->Src[0] );
1173 SVGA3dShaderDestToken temp0 = get_temp( emit );
1174 SVGA3dShaderDestToken temp1 = get_temp( emit );
1175 struct src_register zero, one;
1176
1177 if (emit->unit == PIPE_SHADER_VERTEX) {
1178 /* SGN DST, SRC0, TMP0, TMP1 */
1179 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1180 src( temp0 ), src( temp1 ) );
1181 }
1182
1183 zero = get_zero_immediate( emit );
1184 one = scalar( zero, TGSI_SWIZZLE_W );
1185 zero = scalar( zero, TGSI_SWIZZLE_X );
1186
1187 /* CMP TMP0, SRC0, one, zero */
1188 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1189 writemask( temp0, dst.mask ), src0, one, zero ))
1190 return FALSE;
1191
1192 /* CMP TMP1, negate(SRC0), negate(one), zero */
1193 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1194 writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1195 zero ))
1196 return FALSE;
1197
1198 /* ADD DST, TMP0, TMP1 */
1199 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1200 src( temp1 ) );
1201 }
1202
1203 /*
1204 * ADD DST SRC0, negate(SRC0)
1205 */
1206 static boolean emit_sub(struct svga_shader_emitter *emit,
1207 const struct tgsi_full_instruction *insn)
1208 {
1209 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1210 struct src_register src0 = translate_src_register(
1211 emit, &insn->Src[0] );
1212 struct src_register src1 = translate_src_register(
1213 emit, &insn->Src[1] );
1214
1215 src1 = negate(src1);
1216
1217 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1218 src0, src1 ))
1219 return FALSE;
1220
1221 return TRUE;
1222 }
1223
1224
1225 static boolean emit_kil(struct svga_shader_emitter *emit,
1226 const struct tgsi_full_instruction *insn )
1227 {
1228 const struct tgsi_full_src_register *reg = &insn->Src[0];
1229 struct src_register src0, srcIn;
1230 /* is the W component tested in another position? */
1231 const boolean w_tested = (reg->Register.SwizzleW == reg->Register.SwizzleX ||
1232 reg->Register.SwizzleW == reg->Register.SwizzleY ||
1233 reg->Register.SwizzleW == reg->Register.SwizzleZ);
1234 const boolean special = (reg->Register.Absolute ||
1235 reg->Register.Negate ||
1236 reg->Register.Indirect ||
1237 reg->Register.SwizzleX != 0 ||
1238 reg->Register.SwizzleY != 1 ||
1239 reg->Register.SwizzleZ != 2 ||
1240 reg->Register.File != TGSI_FILE_TEMPORARY);
1241 SVGA3dShaderDestToken temp;
1242
1243 src0 = srcIn = translate_src_register( emit, reg );
1244
1245 if (special || !w_tested) {
1246 /* need a temp reg */
1247 temp = get_temp( emit );
1248 }
1249
1250 if (special) {
1251 /* move the source into a temp register */
1252 submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1253 writemask( temp, TGSI_WRITEMASK_XYZ ),
1254 src0 );
1255
1256 src0 = src( temp );
1257 }
1258
1259 /* do the texkill (on the xyz components) */
1260 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1261 return FALSE;
1262
1263 if (!w_tested) {
1264 /* need to emit a second texkill to test the W component */
1265 /* put src.wwww into temp register */
1266 if (!submit_op1(emit,
1267 inst_token( SVGA3DOP_MOV ),
1268 writemask( temp, TGSI_WRITEMASK_XYZ ),
1269 scalar(srcIn, TGSI_SWIZZLE_W)))
1270 return FALSE;
1271
1272 /* second texkill */
1273 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), temp ))
1274 return FALSE;
1275 }
1276
1277 return TRUE;
1278 }
1279
1280
1281 /* mesa state tracker always emits kilp as an unconditional
1282 * kil */
1283 static boolean emit_kilp(struct svga_shader_emitter *emit,
1284 const struct tgsi_full_instruction *insn )
1285 {
1286 SVGA3dShaderInstToken inst;
1287 SVGA3dShaderDestToken temp;
1288 struct src_register one = scalar( get_zero_immediate( emit ),
1289 TGSI_SWIZZLE_W );
1290
1291 inst = inst_token( SVGA3DOP_TEXKILL );
1292
1293 /* texkill doesn't allow negation on the operand so lets move
1294 * negation of {1} to a temp register */
1295 temp = get_temp( emit );
1296 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1297 negate( one ) ))
1298 return FALSE;
1299
1300 return submit_op0( emit, inst, temp );
1301 }
1302
1303 /* Implement conditionals by initializing destination reg to 'fail',
1304 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1305 * based on predicate reg.
1306 *
1307 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1308 * MOV dst, fail
1309 * MOV dst, pass, p0
1310 */
1311 static boolean
1312 emit_conditional(struct svga_shader_emitter *emit,
1313 unsigned compare_func,
1314 SVGA3dShaderDestToken dst,
1315 struct src_register src0,
1316 struct src_register src1,
1317 struct src_register pass,
1318 struct src_register fail)
1319 {
1320 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1321 SVGA3dShaderInstToken setp_token, mov_token;
1322 setp_token = inst_token( SVGA3DOP_SETP );
1323
1324 switch (compare_func) {
1325 case PIPE_FUNC_NEVER:
1326 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1327 dst, fail );
1328 break;
1329 case PIPE_FUNC_LESS:
1330 setp_token.control = SVGA3DOPCOMP_LT;
1331 break;
1332 case PIPE_FUNC_EQUAL:
1333 setp_token.control = SVGA3DOPCOMP_EQ;
1334 break;
1335 case PIPE_FUNC_LEQUAL:
1336 setp_token.control = SVGA3DOPCOMP_LE;
1337 break;
1338 case PIPE_FUNC_GREATER:
1339 setp_token.control = SVGA3DOPCOMP_GT;
1340 break;
1341 case PIPE_FUNC_NOTEQUAL:
1342 setp_token.control = SVGA3DOPCOMPC_NE;
1343 break;
1344 case PIPE_FUNC_GEQUAL:
1345 setp_token.control = SVGA3DOPCOMP_GE;
1346 break;
1347 case PIPE_FUNC_ALWAYS:
1348 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1349 dst, pass );
1350 break;
1351 }
1352
1353 /* SETP src0, COMPOP, src1 */
1354 if (!submit_op2( emit, setp_token, pred_reg,
1355 src0, src1 ))
1356 return FALSE;
1357
1358 mov_token = inst_token( SVGA3DOP_MOV );
1359
1360 /* MOV dst, fail */
1361 if (!submit_op1( emit, mov_token, dst,
1362 fail ))
1363 return FALSE;
1364
1365 /* MOV dst, pass (predicated)
1366 *
1367 * Note that the predicate reg (and possible modifiers) is passed
1368 * as the first source argument.
1369 */
1370 mov_token.predicated = 1;
1371 if (!submit_op2( emit, mov_token, dst,
1372 src( pred_reg ), pass ))
1373 return FALSE;
1374
1375 return TRUE;
1376 }
1377
1378
1379 static boolean
1380 emit_select(struct svga_shader_emitter *emit,
1381 unsigned compare_func,
1382 SVGA3dShaderDestToken dst,
1383 struct src_register src0,
1384 struct src_register src1 )
1385 {
1386 /* There are some SVGA instructions which implement some selects
1387 * directly, but they are only available in the vertex shader.
1388 */
1389 if (emit->unit == PIPE_SHADER_VERTEX) {
1390 switch (compare_func) {
1391 case PIPE_FUNC_GEQUAL:
1392 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1393 case PIPE_FUNC_LEQUAL:
1394 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1395 case PIPE_FUNC_GREATER:
1396 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1397 case PIPE_FUNC_LESS:
1398 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1399 default:
1400 break;
1401 }
1402 }
1403
1404
1405 /* Otherwise, need to use the setp approach:
1406 */
1407 {
1408 struct src_register one, zero;
1409 /* zero immediate is 0,0,0,1 */
1410 zero = get_zero_immediate( emit );
1411 one = scalar( zero, TGSI_SWIZZLE_W );
1412 zero = scalar( zero, TGSI_SWIZZLE_X );
1413
1414 return emit_conditional(
1415 emit,
1416 compare_func,
1417 dst,
1418 src0,
1419 src1,
1420 one, zero);
1421 }
1422 }
1423
1424
1425 static boolean emit_select_op(struct svga_shader_emitter *emit,
1426 unsigned compare,
1427 const struct tgsi_full_instruction *insn)
1428 {
1429 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1430 struct src_register src0 = translate_src_register(
1431 emit, &insn->Src[0] );
1432 struct src_register src1 = translate_src_register(
1433 emit, &insn->Src[1] );
1434
1435 return emit_select( emit, compare, dst, src0, src1 );
1436 }
1437
1438
1439 /* Translate texture instructions to SVGA3D representation.
1440 */
1441 static boolean emit_tex2(struct svga_shader_emitter *emit,
1442 const struct tgsi_full_instruction *insn,
1443 SVGA3dShaderDestToken dst )
1444 {
1445 SVGA3dShaderInstToken inst;
1446 struct src_register texcoord;
1447 struct src_register sampler;
1448 SVGA3dShaderDestToken tmp;
1449
1450 inst.value = 0;
1451
1452 switch (insn->Instruction.Opcode) {
1453 case TGSI_OPCODE_TEX:
1454 inst.op = SVGA3DOP_TEX;
1455 break;
1456 case TGSI_OPCODE_TXP:
1457 inst.op = SVGA3DOP_TEX;
1458 inst.control = SVGA3DOPCONT_PROJECT;
1459 break;
1460 case TGSI_OPCODE_TXB:
1461 inst.op = SVGA3DOP_TEX;
1462 inst.control = SVGA3DOPCONT_BIAS;
1463 break;
1464 case TGSI_OPCODE_TXL:
1465 inst.op = SVGA3DOP_TEXLDL;
1466 break;
1467 default:
1468 assert(0);
1469 return FALSE;
1470 }
1471
1472 texcoord = translate_src_register( emit, &insn->Src[0] );
1473 sampler = translate_src_register( emit, &insn->Src[1] );
1474
1475 if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1476 emit->dynamic_branching_level > 0)
1477 tmp = get_temp( emit );
1478
1479 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1480 * zero in that case.
1481 */
1482 if (emit->dynamic_branching_level > 0 &&
1483 inst.op == SVGA3DOP_TEX &&
1484 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1485 struct src_register zero = get_zero_immediate( emit );
1486
1487 /* MOV tmp, texcoord */
1488 if (!submit_op1( emit,
1489 inst_token( SVGA3DOP_MOV ),
1490 tmp,
1491 texcoord ))
1492 return FALSE;
1493
1494 /* MOV tmp.w, zero */
1495 if (!submit_op1( emit,
1496 inst_token( SVGA3DOP_MOV ),
1497 writemask( tmp, TGSI_WRITEMASK_W ),
1498 scalar( zero, TGSI_SWIZZLE_X )))
1499 return FALSE;
1500
1501 texcoord = src( tmp );
1502 inst.op = SVGA3DOP_TEXLDL;
1503 }
1504
1505 /* Explicit normalization of texcoords:
1506 */
1507 if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1508 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1509
1510 /* MUL tmp, SRC0, WH */
1511 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1512 tmp, texcoord, wh ))
1513 return FALSE;
1514
1515 texcoord = src( tmp );
1516 }
1517
1518 return submit_op2( emit, inst, dst, texcoord, sampler );
1519 }
1520
1521
1522
1523
1524 /* Translate texture instructions to SVGA3D representation.
1525 */
1526 static boolean emit_tex4(struct svga_shader_emitter *emit,
1527 const struct tgsi_full_instruction *insn,
1528 SVGA3dShaderDestToken dst )
1529 {
1530 SVGA3dShaderInstToken inst;
1531 struct src_register texcoord;
1532 struct src_register ddx;
1533 struct src_register ddy;
1534 struct src_register sampler;
1535
1536 texcoord = translate_src_register( emit, &insn->Src[0] );
1537 ddx = translate_src_register( emit, &insn->Src[1] );
1538 ddy = translate_src_register( emit, &insn->Src[2] );
1539 sampler = translate_src_register( emit, &insn->Src[3] );
1540
1541 inst.value = 0;
1542
1543 switch (insn->Instruction.Opcode) {
1544 case TGSI_OPCODE_TXD:
1545 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1546 break;
1547 default:
1548 assert(0);
1549 return FALSE;
1550 }
1551
1552 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1553 }
1554
1555
1556 /**
1557 * Emit texture swizzle code.
1558 */
1559 static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
1560 SVGA3dShaderDestToken dst,
1561 struct src_register src,
1562 unsigned swizzle_x,
1563 unsigned swizzle_y,
1564 unsigned swizzle_z,
1565 unsigned swizzle_w)
1566 {
1567 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1568 unsigned srcSwizzle[4];
1569 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1570 int i;
1571
1572 /* build writemasks and srcSwizzle terms */
1573 for (i = 0; i < 4; i++) {
1574 if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
1575 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1576 zeroWritemask |= (1 << i);
1577 }
1578 else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
1579 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1580 oneWritemask |= (1 << i);
1581 }
1582 else {
1583 srcSwizzle[i] = swizzleIn[i];
1584 srcWritemask |= (1 << i);
1585 }
1586 }
1587
1588 /* write x/y/z/w comps */
1589 if (dst.mask & srcWritemask) {
1590 if (!submit_op1(emit,
1591 inst_token(SVGA3DOP_MOV),
1592 writemask(dst, srcWritemask),
1593 swizzle(src,
1594 srcSwizzle[0],
1595 srcSwizzle[1],
1596 srcSwizzle[2],
1597 srcSwizzle[3])))
1598 return FALSE;
1599 }
1600
1601 /* write 0 comps */
1602 if (dst.mask & zeroWritemask) {
1603 if (!submit_op1(emit,
1604 inst_token(SVGA3DOP_MOV),
1605 writemask(dst, zeroWritemask),
1606 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X)))
1607 return FALSE;
1608 }
1609
1610 /* write 1 comps */
1611 if (dst.mask & oneWritemask) {
1612 if (!submit_op1(emit,
1613 inst_token(SVGA3DOP_MOV),
1614 writemask(dst, oneWritemask),
1615 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W)))
1616 return FALSE;
1617 }
1618
1619 return TRUE;
1620 }
1621
1622
1623 static boolean emit_tex(struct svga_shader_emitter *emit,
1624 const struct tgsi_full_instruction *insn )
1625 {
1626 SVGA3dShaderDestToken dst =
1627 translate_dst_register( emit, insn, 0 );
1628 struct src_register src0 =
1629 translate_src_register( emit, &insn->Src[0] );
1630 struct src_register src1 =
1631 translate_src_register( emit, &insn->Src[1] );
1632
1633 SVGA3dShaderDestToken tex_result;
1634 const unsigned unit = src1.base.num;
1635
1636 /* check for shadow samplers */
1637 boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
1638 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1639
1640 /* texture swizzle */
1641 boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
1642 emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
1643 emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
1644 emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
1645
1646 /* If doing compare processing or tex swizzle, need to put fetched color into
1647 * a temporary so it can be used as a source later on.
1648 */
1649 if (compare ||
1650 swizzle ||
1651 (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1652 tex_result = get_temp( emit );
1653 }
1654 else {
1655 tex_result = dst;
1656 }
1657
1658 switch(insn->Instruction.Opcode) {
1659 case TGSI_OPCODE_TEX:
1660 case TGSI_OPCODE_TXB:
1661 case TGSI_OPCODE_TXP:
1662 case TGSI_OPCODE_TXL:
1663 if (!emit_tex2( emit, insn, tex_result ))
1664 return FALSE;
1665 break;
1666 case TGSI_OPCODE_TXD:
1667 if (!emit_tex4( emit, insn, tex_result ))
1668 return FALSE;
1669 break;
1670 default:
1671 assert(0);
1672 }
1673
1674
1675 if (compare) {
1676 SVGA3dShaderDestToken dst2;
1677
1678 if (swizzle)
1679 dst2 = tex_result;
1680 else
1681 dst2 = dst;
1682
1683 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1684 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1685 /* When sampling a depth texture, the result of the comparison is in
1686 * the Y component.
1687 */
1688 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1689 struct src_register r_coord;
1690
1691 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1692 /* Divide texcoord R by Q */
1693 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1694 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1695 scalar(src0, TGSI_SWIZZLE_W) ))
1696 return FALSE;
1697
1698 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1699 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1700 scalar(src0, TGSI_SWIZZLE_Z),
1701 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1702 return FALSE;
1703
1704 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1705 }
1706 else {
1707 r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1708 }
1709
1710 /* Compare texture sample value against R component of texcoord */
1711 if (!emit_select(emit,
1712 emit->key.fkey.tex[unit].compare_func,
1713 writemask( dst2, TGSI_WRITEMASK_XYZ ),
1714 r_coord,
1715 tex_src_x))
1716 return FALSE;
1717 }
1718
1719 if (dst.mask & TGSI_WRITEMASK_W) {
1720 struct src_register one =
1721 scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1722
1723 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1724 writemask( dst2, TGSI_WRITEMASK_W ),
1725 one ))
1726 return FALSE;
1727 }
1728 }
1729
1730 if (swizzle) {
1731 /* swizzle from tex_result to dst */
1732 emit_tex_swizzle(emit,
1733 dst, src(tex_result),
1734 emit->key.fkey.tex[unit].swizzle_r,
1735 emit->key.fkey.tex[unit].swizzle_g,
1736 emit->key.fkey.tex[unit].swizzle_b,
1737 emit->key.fkey.tex[unit].swizzle_a);
1738 }
1739
1740 if (!emit->use_sm30 &&
1741 dst.mask != TGSI_WRITEMASK_XYZW &&
1742 !compare &&
1743 !swizzle) {
1744 /* pre SM3.0 a TEX instruction can't have a writemask. Do it as a
1745 * separate step here.
1746 */
1747 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1748 return FALSE;
1749 }
1750
1751 return TRUE;
1752 }
1753
1754 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1755 const struct tgsi_full_instruction *insn )
1756 {
1757 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1758 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1759 struct src_register const_int = get_loop_const( emit );
1760
1761 emit->dynamic_branching_level++;
1762
1763 return (emit_instruction( emit, inst ) &&
1764 emit_src( emit, loop_reg ) &&
1765 emit_src( emit, const_int ) );
1766 }
1767
1768 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1769 const struct tgsi_full_instruction *insn )
1770 {
1771 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1772
1773 emit->dynamic_branching_level--;
1774
1775 return emit_instruction( emit, inst );
1776 }
1777
1778 static boolean emit_brk( struct svga_shader_emitter *emit,
1779 const struct tgsi_full_instruction *insn )
1780 {
1781 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1782 return emit_instruction( emit, inst );
1783 }
1784
1785 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1786 unsigned opcode,
1787 const struct tgsi_full_instruction *insn )
1788 {
1789 SVGA3dShaderInstToken inst;
1790 SVGA3dShaderDestToken dst;
1791 struct src_register src;
1792
1793 inst = inst_token( opcode );
1794 dst = translate_dst_register( emit, insn, 0 );
1795 src = translate_src_register( emit, &insn->Src[0] );
1796 src = scalar( src, TGSI_SWIZZLE_X );
1797
1798 return submit_op1( emit, inst, dst, src );
1799 }
1800
1801
1802 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1803 unsigned opcode,
1804 const struct tgsi_full_instruction *insn )
1805 {
1806 const struct tgsi_full_src_register *src = insn->Src;
1807 SVGA3dShaderInstToken inst;
1808 SVGA3dShaderDestToken dst;
1809
1810 inst = inst_token( opcode );
1811 dst = translate_dst_register( emit, insn, 0 );
1812
1813 switch (insn->Instruction.NumSrcRegs) {
1814 case 0:
1815 return submit_op0( emit, inst, dst );
1816 case 1:
1817 return submit_op1( emit, inst, dst,
1818 translate_src_register( emit, &src[0] ));
1819 case 2:
1820 return submit_op2( emit, inst, dst,
1821 translate_src_register( emit, &src[0] ),
1822 translate_src_register( emit, &src[1] ) );
1823 case 3:
1824 return submit_op3( emit, inst, dst,
1825 translate_src_register( emit, &src[0] ),
1826 translate_src_register( emit, &src[1] ),
1827 translate_src_register( emit, &src[2] ) );
1828 default:
1829 assert(0);
1830 return FALSE;
1831 }
1832 }
1833
1834
1835 static boolean emit_deriv(struct svga_shader_emitter *emit,
1836 const struct tgsi_full_instruction *insn )
1837 {
1838 if (emit->dynamic_branching_level > 0 &&
1839 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1840 {
1841 struct src_register zero = get_zero_immediate( emit );
1842 SVGA3dShaderDestToken dst =
1843 translate_dst_register( emit, insn, 0 );
1844
1845 /* Deriv opcodes not valid inside dynamic branching, workaround
1846 * by zeroing out the destination.
1847 */
1848 if (!submit_op1(emit,
1849 inst_token( SVGA3DOP_MOV ),
1850 dst,
1851 scalar(zero, TGSI_SWIZZLE_X)))
1852 return FALSE;
1853
1854 return TRUE;
1855 }
1856 else {
1857 unsigned opcode;
1858 const struct tgsi_full_src_register *reg = &insn->Src[0];
1859 SVGA3dShaderInstToken inst;
1860 SVGA3dShaderDestToken dst;
1861 struct src_register src0;
1862
1863 switch (insn->Instruction.Opcode) {
1864 case TGSI_OPCODE_DDX:
1865 opcode = SVGA3DOP_DSX;
1866 break;
1867 case TGSI_OPCODE_DDY:
1868 opcode = SVGA3DOP_DSY;
1869 break;
1870 default:
1871 return FALSE;
1872 }
1873
1874 inst = inst_token( opcode );
1875 dst = translate_dst_register( emit, insn, 0 );
1876 src0 = translate_src_register( emit, reg );
1877
1878 /* We cannot use negate or abs on source to dsx/dsy instruction.
1879 */
1880 if (reg->Register.Absolute ||
1881 reg->Register.Negate) {
1882 SVGA3dShaderDestToken temp = get_temp( emit );
1883
1884 if (!emit_repl( emit, temp, &src0 ))
1885 return FALSE;
1886 }
1887
1888 return submit_op1( emit, inst, dst, src0 );
1889 }
1890 }
1891
1892 static boolean emit_arl(struct svga_shader_emitter *emit,
1893 const struct tgsi_full_instruction *insn)
1894 {
1895 ++emit->current_arl;
1896 if (emit->unit == PIPE_SHADER_FRAGMENT) {
1897 /* MOVA not present in pixel shader instruction set.
1898 * Ignore this instruction altogether since it is
1899 * only used for loop counters -- and for that
1900 * we reference aL directly.
1901 */
1902 return TRUE;
1903 }
1904 if (svga_arl_needs_adjustment( emit )) {
1905 return emit_fake_arl( emit, insn );
1906 } else {
1907 /* no need to adjust, just emit straight arl */
1908 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1909 }
1910 }
1911
1912 static boolean emit_pow(struct svga_shader_emitter *emit,
1913 const struct tgsi_full_instruction *insn)
1914 {
1915 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1916 struct src_register src0 = translate_src_register(
1917 emit, &insn->Src[0] );
1918 struct src_register src1 = translate_src_register(
1919 emit, &insn->Src[1] );
1920 boolean need_tmp = FALSE;
1921
1922 /* POW can only output to a temporary */
1923 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1924 need_tmp = TRUE;
1925
1926 /* POW src1 must not be the same register as dst */
1927 if (alias_src_dst( src1, dst ))
1928 need_tmp = TRUE;
1929
1930 /* it's a scalar op */
1931 src0 = scalar( src0, TGSI_SWIZZLE_X );
1932 src1 = scalar( src1, TGSI_SWIZZLE_X );
1933
1934 if (need_tmp) {
1935 SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1936
1937 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1938 return FALSE;
1939
1940 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1941 }
1942 else {
1943 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1944 }
1945 }
1946
1947 static boolean emit_xpd(struct svga_shader_emitter *emit,
1948 const struct tgsi_full_instruction *insn)
1949 {
1950 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1951 const struct src_register src0 = translate_src_register(
1952 emit, &insn->Src[0] );
1953 const struct src_register src1 = translate_src_register(
1954 emit, &insn->Src[1] );
1955 boolean need_dst_tmp = FALSE;
1956
1957 /* XPD can only output to a temporary */
1958 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1959 need_dst_tmp = TRUE;
1960
1961 /* The dst reg must not be the same as src0 or src1*/
1962 if (alias_src_dst(src0, dst) ||
1963 alias_src_dst(src1, dst))
1964 need_dst_tmp = TRUE;
1965
1966 if (need_dst_tmp) {
1967 SVGA3dShaderDestToken tmp = get_temp( emit );
1968
1969 /* Obey DX9 restrictions on mask:
1970 */
1971 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1972
1973 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1974 return FALSE;
1975
1976 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1977 return FALSE;
1978 }
1979 else {
1980 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1981 return FALSE;
1982 }
1983
1984 /* Need to emit 1.0 to dst.w?
1985 */
1986 if (dst.mask & TGSI_WRITEMASK_W) {
1987 struct src_register zero = get_zero_immediate( emit );
1988
1989 if (!submit_op1(emit,
1990 inst_token( SVGA3DOP_MOV ),
1991 writemask(dst, TGSI_WRITEMASK_W),
1992 zero))
1993 return FALSE;
1994 }
1995
1996 return TRUE;
1997 }
1998
1999
2000 static boolean emit_lrp(struct svga_shader_emitter *emit,
2001 const struct tgsi_full_instruction *insn)
2002 {
2003 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2004 const struct src_register src0 = translate_src_register(
2005 emit, &insn->Src[0] );
2006 const struct src_register src1 = translate_src_register(
2007 emit, &insn->Src[1] );
2008 const struct src_register src2 = translate_src_register(
2009 emit, &insn->Src[2] );
2010
2011 return submit_lrp(emit, dst, src0, src1, src2);
2012 }
2013
2014
2015 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
2016 const struct tgsi_full_instruction *insn )
2017 {
2018 if (emit->unit == PIPE_SHADER_VERTEX) {
2019 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2020 */
2021 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2022 }
2023 else {
2024
2025 /* result[0] = 1 * 1;
2026 * result[1] = a[1] * b[1];
2027 * result[2] = a[2] * 1;
2028 * result[3] = 1 * b[3];
2029 */
2030
2031 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2032 SVGA3dShaderDestToken tmp;
2033 const struct src_register src0 = translate_src_register(
2034 emit, &insn->Src[0] );
2035 const struct src_register src1 = translate_src_register(
2036 emit, &insn->Src[1] );
2037 struct src_register zero = get_zero_immediate( emit );
2038 boolean need_tmp = FALSE;
2039
2040 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2041 alias_src_dst(src0, dst) ||
2042 alias_src_dst(src1, dst))
2043 need_tmp = TRUE;
2044
2045 if (need_tmp) {
2046 tmp = get_temp( emit );
2047 }
2048 else {
2049 tmp = dst;
2050 }
2051
2052 /* tmp.xw = 1.0
2053 */
2054 if (tmp.mask & TGSI_WRITEMASK_XW) {
2055 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2056 writemask(tmp, TGSI_WRITEMASK_XW ),
2057 scalar( zero, 3 )))
2058 return FALSE;
2059 }
2060
2061 /* tmp.yz = src0
2062 */
2063 if (tmp.mask & TGSI_WRITEMASK_YZ) {
2064 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2065 writemask(tmp, TGSI_WRITEMASK_YZ ),
2066 src0))
2067 return FALSE;
2068 }
2069
2070 /* tmp.yw = tmp * src1
2071 */
2072 if (tmp.mask & TGSI_WRITEMASK_YW) {
2073 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2074 writemask(tmp, TGSI_WRITEMASK_YW ),
2075 src(tmp),
2076 src1))
2077 return FALSE;
2078 }
2079
2080 /* dst = tmp
2081 */
2082 if (need_tmp) {
2083 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2084 dst,
2085 src(tmp)))
2086 return FALSE;
2087 }
2088 }
2089
2090 return TRUE;
2091 }
2092
2093
2094 static boolean emit_exp(struct svga_shader_emitter *emit,
2095 const struct tgsi_full_instruction *insn)
2096 {
2097 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2098 struct src_register src0 =
2099 translate_src_register( emit, &insn->Src[0] );
2100 struct src_register zero = get_zero_immediate( emit );
2101 SVGA3dShaderDestToken fraction;
2102
2103 if (dst.mask & TGSI_WRITEMASK_Y)
2104 fraction = dst;
2105 else if (dst.mask & TGSI_WRITEMASK_X)
2106 fraction = get_temp( emit );
2107 else
2108 fraction.value = 0;
2109
2110 /* If y is being written, fill it with src0 - floor(src0).
2111 */
2112 if (dst.mask & TGSI_WRITEMASK_XY) {
2113 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2114 writemask( fraction, TGSI_WRITEMASK_Y ),
2115 src0 ))
2116 return FALSE;
2117 }
2118
2119 /* If x is being written, fill it with 2 ^ floor(src0).
2120 */
2121 if (dst.mask & TGSI_WRITEMASK_X) {
2122 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2123 writemask( dst, TGSI_WRITEMASK_X ),
2124 src0,
2125 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2126 return FALSE;
2127
2128 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2129 writemask( dst, TGSI_WRITEMASK_X ),
2130 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2131 return FALSE;
2132
2133 if (!(dst.mask & TGSI_WRITEMASK_Y))
2134 release_temp( emit, fraction );
2135 }
2136
2137 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2138 */
2139 if (dst.mask & TGSI_WRITEMASK_Z) {
2140 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2141 writemask( dst, TGSI_WRITEMASK_Z ),
2142 src0 ) )
2143 return FALSE;
2144 }
2145
2146 /* If w is being written, fill it with one.
2147 */
2148 if (dst.mask & TGSI_WRITEMASK_W) {
2149 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2150 writemask(dst, TGSI_WRITEMASK_W),
2151 scalar( zero, TGSI_SWIZZLE_W ) ))
2152 return FALSE;
2153 }
2154
2155 return TRUE;
2156 }
2157
2158 static boolean emit_lit(struct svga_shader_emitter *emit,
2159 const struct tgsi_full_instruction *insn )
2160 {
2161 if (emit->unit == PIPE_SHADER_VERTEX) {
2162 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2163 */
2164 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2165 }
2166 else {
2167
2168 /* D3D vs. GL semantics can be fairly easily accomodated by
2169 * variations on this sequence.
2170 *
2171 * GL:
2172 * tmp.y = src.x
2173 * tmp.z = pow(src.y,src.w)
2174 * p0 = src0.xxxx > 0
2175 * result = zero.wxxw
2176 * (p0) result.yz = tmp
2177 *
2178 * D3D:
2179 * tmp.y = src.x
2180 * tmp.z = pow(src.y,src.w)
2181 * p0 = src0.xxyy > 0
2182 * result = zero.wxxw
2183 * (p0) result.yz = tmp
2184 *
2185 * Will implement the GL version for now.
2186 */
2187
2188 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2189 SVGA3dShaderDestToken tmp = get_temp( emit );
2190 const struct src_register src0 = translate_src_register(
2191 emit, &insn->Src[0] );
2192 struct src_register zero = get_zero_immediate( emit );
2193
2194 /* tmp = pow(src.y, src.w)
2195 */
2196 if (dst.mask & TGSI_WRITEMASK_Z) {
2197 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2198 tmp,
2199 scalar(src0, 1),
2200 scalar(src0, 3)))
2201 return FALSE;
2202 }
2203
2204 /* tmp.y = src.x
2205 */
2206 if (dst.mask & TGSI_WRITEMASK_Y) {
2207 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2208 writemask(tmp, TGSI_WRITEMASK_Y ),
2209 scalar(src0, 0)))
2210 return FALSE;
2211 }
2212
2213 /* Can't quite do this with emit conditional due to the extra
2214 * writemask on the predicated mov:
2215 */
2216 {
2217 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2218 SVGA3dShaderInstToken setp_token, mov_token;
2219 struct src_register predsrc;
2220
2221 setp_token = inst_token( SVGA3DOP_SETP );
2222 mov_token = inst_token( SVGA3DOP_MOV );
2223
2224 setp_token.control = SVGA3DOPCOMP_GT;
2225
2226 /* D3D vs GL semantics:
2227 */
2228 if (0)
2229 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2230 else
2231 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2232
2233 /* SETP src0.xxyy, GT, {0}.x */
2234 if (!submit_op2( emit, setp_token, pred_reg,
2235 predsrc,
2236 swizzle(zero, 0, 0, 0, 0) ))
2237 return FALSE;
2238
2239 /* MOV dst, fail */
2240 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2241 swizzle(zero, 3, 0, 0, 3 )))
2242 return FALSE;
2243
2244 /* MOV dst.yz, tmp (predicated)
2245 *
2246 * Note that the predicate reg (and possible modifiers) is passed
2247 * as the first source argument.
2248 */
2249 if (dst.mask & TGSI_WRITEMASK_YZ) {
2250 mov_token.predicated = 1;
2251 if (!submit_op2( emit, mov_token,
2252 writemask(dst, TGSI_WRITEMASK_YZ),
2253 src( pred_reg ), src( tmp ) ))
2254 return FALSE;
2255 }
2256 }
2257 }
2258
2259 return TRUE;
2260 }
2261
2262
2263
2264
2265 static boolean emit_ex2( struct svga_shader_emitter *emit,
2266 const struct tgsi_full_instruction *insn )
2267 {
2268 SVGA3dShaderInstToken inst;
2269 SVGA3dShaderDestToken dst;
2270 struct src_register src0;
2271
2272 inst = inst_token( SVGA3DOP_EXP );
2273 dst = translate_dst_register( emit, insn, 0 );
2274 src0 = translate_src_register( emit, &insn->Src[0] );
2275 src0 = scalar( src0, TGSI_SWIZZLE_X );
2276
2277 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2278 SVGA3dShaderDestToken tmp = get_temp( emit );
2279
2280 if (!submit_op1( emit, inst, tmp, src0 ))
2281 return FALSE;
2282
2283 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2284 dst,
2285 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2286 }
2287
2288 return submit_op1( emit, inst, dst, src0 );
2289 }
2290
2291
2292 static boolean emit_log(struct svga_shader_emitter *emit,
2293 const struct tgsi_full_instruction *insn)
2294 {
2295 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2296 struct src_register src0 =
2297 translate_src_register( emit, &insn->Src[0] );
2298 struct src_register zero = get_zero_immediate( emit );
2299 SVGA3dShaderDestToken abs_tmp;
2300 struct src_register abs_src0;
2301 SVGA3dShaderDestToken log2_abs;
2302
2303 abs_tmp.value = 0;
2304
2305 if (dst.mask & TGSI_WRITEMASK_Z)
2306 log2_abs = dst;
2307 else if (dst.mask & TGSI_WRITEMASK_XY)
2308 log2_abs = get_temp( emit );
2309 else
2310 log2_abs.value = 0;
2311
2312 /* If z is being written, fill it with log2( abs( src0 ) ).
2313 */
2314 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2315 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2316 abs_src0 = src0;
2317 else {
2318 abs_tmp = get_temp( emit );
2319
2320 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2321 abs_tmp,
2322 src0 ) )
2323 return FALSE;
2324
2325 abs_src0 = src( abs_tmp );
2326 }
2327
2328 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2329
2330 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2331 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2332 abs_src0 ) )
2333 return FALSE;
2334 }
2335
2336 if (dst.mask & TGSI_WRITEMASK_XY) {
2337 SVGA3dShaderDestToken floor_log2;
2338
2339 if (dst.mask & TGSI_WRITEMASK_X)
2340 floor_log2 = dst;
2341 else
2342 floor_log2 = get_temp( emit );
2343
2344 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2345 */
2346 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2347 writemask( floor_log2, TGSI_WRITEMASK_X ),
2348 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2349 return FALSE;
2350
2351 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2352 writemask( floor_log2, TGSI_WRITEMASK_X ),
2353 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2354 negate( src( floor_log2 ) ) ) )
2355 return FALSE;
2356
2357 /* If y is being written, fill it with
2358 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2359 */
2360 if (dst.mask & TGSI_WRITEMASK_Y) {
2361 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2362 writemask( dst, TGSI_WRITEMASK_Y ),
2363 negate( scalar( src( floor_log2 ),
2364 TGSI_SWIZZLE_X ) ) ) )
2365 return FALSE;
2366
2367 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2368 writemask( dst, TGSI_WRITEMASK_Y ),
2369 src( dst ),
2370 abs_src0 ) )
2371 return FALSE;
2372 }
2373
2374 if (!(dst.mask & TGSI_WRITEMASK_X))
2375 release_temp( emit, floor_log2 );
2376
2377 if (!(dst.mask & TGSI_WRITEMASK_Z))
2378 release_temp( emit, log2_abs );
2379 }
2380
2381 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2382 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2383 release_temp( emit, abs_tmp );
2384
2385 /* If w is being written, fill it with one.
2386 */
2387 if (dst.mask & TGSI_WRITEMASK_W) {
2388 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2389 writemask(dst, TGSI_WRITEMASK_W),
2390 scalar( zero, TGSI_SWIZZLE_W ) ))
2391 return FALSE;
2392 }
2393
2394 return TRUE;
2395 }
2396
2397
2398 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2399 unsigned position,
2400 const struct tgsi_full_instruction *insn )
2401 {
2402 unsigned i;
2403
2404 /* Note that we've finished the main function and are now emitting
2405 * subroutines. This affects how we terminate the generated
2406 * shader.
2407 */
2408 emit->in_main_func = FALSE;
2409
2410 for (i = 0; i < emit->nr_labels; i++) {
2411 if (emit->label[i] == position) {
2412 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2413 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2414 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2415 }
2416 }
2417
2418 assert(0);
2419 return TRUE;
2420 }
2421
2422 static boolean emit_call( struct svga_shader_emitter *emit,
2423 const struct tgsi_full_instruction *insn )
2424 {
2425 unsigned position = insn->Label.Label;
2426 unsigned i;
2427
2428 for (i = 0; i < emit->nr_labels; i++) {
2429 if (emit->label[i] == position)
2430 break;
2431 }
2432
2433 if (emit->nr_labels == Elements(emit->label))
2434 return FALSE;
2435
2436 if (i == emit->nr_labels) {
2437 emit->label[i] = position;
2438 emit->nr_labels++;
2439 }
2440
2441 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2442 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2443 }
2444
2445
2446 static boolean emit_end( struct svga_shader_emitter *emit )
2447 {
2448 if (emit->unit == PIPE_SHADER_VERTEX) {
2449 return emit_vs_postamble( emit );
2450 }
2451 else {
2452 return emit_ps_postamble( emit );
2453 }
2454 }
2455
2456
2457
2458 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2459 unsigned position,
2460 const struct tgsi_full_instruction *insn )
2461 {
2462 switch (insn->Instruction.Opcode) {
2463
2464 case TGSI_OPCODE_ARL:
2465 return emit_arl( emit, insn );
2466
2467 case TGSI_OPCODE_TEX:
2468 case TGSI_OPCODE_TXB:
2469 case TGSI_OPCODE_TXP:
2470 case TGSI_OPCODE_TXL:
2471 case TGSI_OPCODE_TXD:
2472 return emit_tex( emit, insn );
2473
2474 case TGSI_OPCODE_DDX:
2475 case TGSI_OPCODE_DDY:
2476 return emit_deriv( emit, insn );
2477
2478 case TGSI_OPCODE_BGNSUB:
2479 return emit_bgnsub( emit, position, insn );
2480
2481 case TGSI_OPCODE_ENDSUB:
2482 return TRUE;
2483
2484 case TGSI_OPCODE_CAL:
2485 return emit_call( emit, insn );
2486
2487 case TGSI_OPCODE_FLR:
2488 case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */
2489 return emit_floor( emit, insn );
2490
2491 case TGSI_OPCODE_CMP:
2492 return emit_cmp( emit, insn );
2493
2494 case TGSI_OPCODE_DIV:
2495 return emit_div( emit, insn );
2496
2497 case TGSI_OPCODE_DP2:
2498 return emit_dp2( emit, insn );
2499
2500 case TGSI_OPCODE_DPH:
2501 return emit_dph( emit, insn );
2502
2503 case TGSI_OPCODE_NRM:
2504 return emit_nrm( emit, insn );
2505
2506 case TGSI_OPCODE_COS:
2507 return emit_cos( emit, insn );
2508
2509 case TGSI_OPCODE_SIN:
2510 return emit_sin( emit, insn );
2511
2512 case TGSI_OPCODE_SCS:
2513 return emit_sincos( emit, insn );
2514
2515 case TGSI_OPCODE_END:
2516 /* TGSI always finishes the main func with an END */
2517 return emit_end( emit );
2518
2519 case TGSI_OPCODE_KIL:
2520 return emit_kil( emit, insn );
2521
2522 /* Selection opcodes. The underlying language is fairly
2523 * non-orthogonal about these.
2524 */
2525 case TGSI_OPCODE_SEQ:
2526 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2527
2528 case TGSI_OPCODE_SNE:
2529 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2530
2531 case TGSI_OPCODE_SGT:
2532 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2533
2534 case TGSI_OPCODE_SGE:
2535 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2536
2537 case TGSI_OPCODE_SLT:
2538 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2539
2540 case TGSI_OPCODE_SLE:
2541 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2542
2543 case TGSI_OPCODE_SUB:
2544 return emit_sub( emit, insn );
2545
2546 case TGSI_OPCODE_POW:
2547 return emit_pow( emit, insn );
2548
2549 case TGSI_OPCODE_EX2:
2550 return emit_ex2( emit, insn );
2551
2552 case TGSI_OPCODE_EXP:
2553 return emit_exp( emit, insn );
2554
2555 case TGSI_OPCODE_LOG:
2556 return emit_log( emit, insn );
2557
2558 case TGSI_OPCODE_LG2:
2559 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2560
2561 case TGSI_OPCODE_RSQ:
2562 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2563
2564 case TGSI_OPCODE_RCP:
2565 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2566
2567 case TGSI_OPCODE_CONT:
2568 case TGSI_OPCODE_RET:
2569 /* This is a noop -- we tell mesa that we can't support RET
2570 * within a function (early return), so this will always be
2571 * followed by an ENDSUB.
2572 */
2573 return TRUE;
2574
2575 /* These aren't actually used by any of the frontends we care
2576 * about:
2577 */
2578 case TGSI_OPCODE_CLAMP:
2579 case TGSI_OPCODE_ROUND:
2580 case TGSI_OPCODE_AND:
2581 case TGSI_OPCODE_OR:
2582 case TGSI_OPCODE_I2F:
2583 case TGSI_OPCODE_NOT:
2584 case TGSI_OPCODE_SHL:
2585 case TGSI_OPCODE_ISHR:
2586 case TGSI_OPCODE_XOR:
2587 return FALSE;
2588
2589 case TGSI_OPCODE_IF:
2590 return emit_if( emit, insn );
2591 case TGSI_OPCODE_ELSE:
2592 return emit_else( emit, insn );
2593 case TGSI_OPCODE_ENDIF:
2594 return emit_endif( emit, insn );
2595
2596 case TGSI_OPCODE_BGNLOOP:
2597 return emit_bgnloop2( emit, insn );
2598 case TGSI_OPCODE_ENDLOOP:
2599 return emit_endloop2( emit, insn );
2600 case TGSI_OPCODE_BRK:
2601 return emit_brk( emit, insn );
2602
2603 case TGSI_OPCODE_XPD:
2604 return emit_xpd( emit, insn );
2605
2606 case TGSI_OPCODE_KILP:
2607 return emit_kilp( emit, insn );
2608
2609 case TGSI_OPCODE_DST:
2610 return emit_dst_insn( emit, insn );
2611
2612 case TGSI_OPCODE_LIT:
2613 return emit_lit( emit, insn );
2614
2615 case TGSI_OPCODE_LRP:
2616 return emit_lrp( emit, insn );
2617
2618 case TGSI_OPCODE_SSG:
2619 return emit_ssg( emit, insn );
2620
2621 default: {
2622 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2623
2624 if (opcode == SVGA3DOP_LAST_INST)
2625 return FALSE;
2626
2627 if (!emit_simple_instruction( emit, opcode, insn ))
2628 return FALSE;
2629 }
2630 }
2631
2632 return TRUE;
2633 }
2634
2635
2636 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2637 struct tgsi_full_immediate *imm)
2638 {
2639 static const float id[4] = {0,0,0,1};
2640 float value[4];
2641 unsigned i;
2642
2643 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2644 for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2645 value[i] = imm->u[i].Float;
2646
2647 for ( ; i < 4; i++ )
2648 value[i] = id[i];
2649
2650 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2651 emit->imm_start + emit->internal_imm_count++,
2652 value[0], value[1], value[2], value[3]);
2653 }
2654
2655 static boolean make_immediate( struct svga_shader_emitter *emit,
2656 float a,
2657 float b,
2658 float c,
2659 float d,
2660 struct src_register *out )
2661 {
2662 unsigned idx = emit->nr_hw_float_const++;
2663
2664 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2665 idx, a, b, c, d ))
2666 return FALSE;
2667
2668 *out = src_register( SVGA3DREG_CONST, idx );
2669
2670 return TRUE;
2671 }
2672
2673 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2674 {
2675 if (!emit->key.vkey.need_prescale) {
2676 if (!make_immediate( emit, 0, 0, .5, .5,
2677 &emit->imm_0055))
2678 return FALSE;
2679 }
2680
2681 return TRUE;
2682 }
2683
2684 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2685 {
2686 unsigned i;
2687
2688 /* For SM20, need to initialize the temporaries we're using to hold
2689 * color outputs to some value. Shaders which don't set all of
2690 * these values are likely to be rejected by the DX9 runtime.
2691 */
2692 if (!emit->use_sm30) {
2693 struct src_register zero = get_zero_immediate( emit );
2694 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2695 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2696
2697 if (!submit_op1( emit,
2698 inst_token(SVGA3DOP_MOV),
2699 emit->temp_col[i],
2700 zero ))
2701 return FALSE;
2702 }
2703 }
2704 } else if (emit->ps_reads_pos && emit->info.reads_z) {
2705 /*
2706 * Assemble the position from various bits of inputs. Depth and W are
2707 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
2708 * Also fixup the perspective interpolation.
2709 *
2710 * temp_pos.xy = vPos.xy
2711 * temp_pos.w = rcp(texcoord1.w);
2712 * temp_pos.z = texcoord1.z * temp_pos.w;
2713 */
2714 if (!submit_op1( emit,
2715 inst_token(SVGA3DOP_MOV),
2716 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
2717 emit->ps_true_pos ))
2718 return FALSE;
2719
2720 if (!submit_op1( emit,
2721 inst_token(SVGA3DOP_RCP),
2722 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
2723 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
2724 return FALSE;
2725
2726 if (!submit_op2( emit,
2727 inst_token(SVGA3DOP_MUL),
2728 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
2729 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
2730 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
2731 return FALSE;
2732 }
2733
2734 return TRUE;
2735 }
2736
2737 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2738 {
2739 unsigned i;
2740
2741 /* PS oDepth is incredibly fragile and it's very hard to catch the
2742 * types of usage that break it during shader emit. Easier just to
2743 * redirect the main program to a temporary and then only touch
2744 * oDepth with a hand-crafted MOV below.
2745 */
2746 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2747
2748 if (!submit_op1( emit,
2749 inst_token(SVGA3DOP_MOV),
2750 emit->true_pos,
2751 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2752 return FALSE;
2753 }
2754
2755 /* Similarly for SM20 color outputs... Luckily SM30 isn't so
2756 * fragile.
2757 */
2758 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2759 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2760
2761 /* Potentially override output colors with white for XOR
2762 * logicop workaround.
2763 */
2764 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2765 emit->key.fkey.white_fragments) {
2766
2767 struct src_register one = scalar( get_zero_immediate( emit ),
2768 TGSI_SWIZZLE_W );
2769
2770 if (!submit_op1( emit,
2771 inst_token(SVGA3DOP_MOV),
2772 emit->true_col[i],
2773 one ))
2774 return FALSE;
2775 }
2776 else {
2777 if (!submit_op1( emit,
2778 inst_token(SVGA3DOP_MOV),
2779 emit->true_col[i],
2780 src(emit->temp_col[i]) ))
2781 return FALSE;
2782 }
2783 }
2784 }
2785
2786 return TRUE;
2787 }
2788
2789 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2790 {
2791 /* PSIZ output is incredibly fragile and it's very hard to catch
2792 * the types of usage that break it during shader emit. Easier
2793 * just to redirect the main program to a temporary and then only
2794 * touch PSIZ with a hand-crafted MOV below.
2795 */
2796 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2797
2798 if (!submit_op1( emit,
2799 inst_token(SVGA3DOP_MOV),
2800 emit->true_psiz,
2801 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2802 return FALSE;
2803 }
2804
2805 /* Need to perform various manipulations on vertex position to cope
2806 * with the different GL and D3D clip spaces.
2807 */
2808 if (emit->key.vkey.need_prescale) {
2809 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2810 SVGA3dShaderDestToken depth = emit->depth_pos;
2811 SVGA3dShaderDestToken pos = emit->true_pos;
2812 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2813 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2814 offset + 0 );
2815 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2816 offset + 1 );
2817
2818 if (!submit_op1( emit,
2819 inst_token(SVGA3DOP_MOV),
2820 writemask(depth, TGSI_WRITEMASK_W),
2821 scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
2822 return FALSE;
2823
2824 /* MUL temp_pos.xyz, temp_pos, prescale.scale
2825 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2826 * --> Note that prescale.trans.w == 0
2827 */
2828 if (!submit_op2( emit,
2829 inst_token(SVGA3DOP_MUL),
2830 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2831 src(temp_pos),
2832 prescale_scale ))
2833 return FALSE;
2834
2835 if (!submit_op3( emit,
2836 inst_token(SVGA3DOP_MAD),
2837 pos,
2838 swizzle(src(temp_pos), 3, 3, 3, 3),
2839 prescale_trans,
2840 src(temp_pos)))
2841 return FALSE;
2842
2843 /* Also write to depth value */
2844 if (!submit_op3( emit,
2845 inst_token(SVGA3DOP_MAD),
2846 writemask(depth, TGSI_WRITEMASK_Z),
2847 swizzle(src(temp_pos), 3, 3, 3, 3),
2848 prescale_trans,
2849 src(temp_pos) ))
2850 return FALSE;
2851 }
2852 else {
2853 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2854 SVGA3dShaderDestToken depth = emit->depth_pos;
2855 SVGA3dShaderDestToken pos = emit->true_pos;
2856 struct src_register imm_0055 = emit->imm_0055;
2857
2858 /* Adjust GL clipping coordinate space to hardware (D3D-style):
2859 *
2860 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2861 * MOV result.position, temp_pos
2862 */
2863 if (!submit_op2( emit,
2864 inst_token(SVGA3DOP_DP4),
2865 writemask(temp_pos, TGSI_WRITEMASK_Z),
2866 imm_0055,
2867 src(temp_pos) ))
2868 return FALSE;
2869
2870 if (!submit_op1( emit,
2871 inst_token(SVGA3DOP_MOV),
2872 pos,
2873 src(temp_pos) ))
2874 return FALSE;
2875
2876 /* Move the manipulated depth into the extra texcoord reg */
2877 if (!submit_op1( emit,
2878 inst_token(SVGA3DOP_MOV),
2879 writemask(depth, TGSI_WRITEMASK_ZW),
2880 src(temp_pos) ))
2881 return FALSE;
2882 }
2883
2884 return TRUE;
2885 }
2886
2887 /*
2888 0: IF VFACE :4
2889 1: COLOR = FrontColor;
2890 2: ELSE
2891 3: COLOR = BackColor;
2892 4: ENDIF
2893 */
2894 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2895 {
2896 struct src_register vface, zero;
2897 struct src_register front[2];
2898 struct src_register back[2];
2899 SVGA3dShaderDestToken color[2];
2900 int count = emit->internal_color_count;
2901 int i;
2902 SVGA3dShaderInstToken if_token;
2903
2904 if (count == 0)
2905 return TRUE;
2906
2907 vface = get_vface( emit );
2908 zero = get_zero_immediate( emit );
2909
2910 /* Can't use get_temp() to allocate the color reg as such
2911 * temporaries will be reclaimed after each instruction by the call
2912 * to reset_temp_regs().
2913 */
2914 for (i = 0; i < count; i++) {
2915 color[i] = dst_register( SVGA3DREG_TEMP,
2916 emit->nr_hw_temp++ );
2917
2918 front[i] = emit->input_map[emit->internal_color_idx[i]];
2919
2920 /* Back is always the next input:
2921 */
2922 back[i] = front[i];
2923 back[i].base.num = front[i].base.num + 1;
2924
2925 /* Reassign the input_map to the actual front-face color:
2926 */
2927 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2928 }
2929
2930 if_token = inst_token( SVGA3DOP_IFC );
2931
2932 if (emit->key.fkey.front_ccw)
2933 if_token.control = SVGA3DOPCOMP_LT;
2934 else
2935 if_token.control = SVGA3DOPCOMP_GT;
2936
2937 zero = scalar(zero, TGSI_SWIZZLE_X);
2938
2939 if (!(emit_instruction( emit, if_token ) &&
2940 emit_src( emit, vface ) &&
2941 emit_src( emit, zero ) ))
2942 return FALSE;
2943
2944 for (i = 0; i < count; i++) {
2945 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2946 return FALSE;
2947 }
2948
2949 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2950 return FALSE;
2951
2952 for (i = 0; i < count; i++) {
2953 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2954 return FALSE;
2955 }
2956
2957 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2958 return FALSE;
2959
2960 return TRUE;
2961 }
2962
2963 /*
2964 0: SETP_GT TEMP, VFACE, 0
2965 where TEMP is a fake frontface register
2966 */
2967 static boolean emit_frontface( struct svga_shader_emitter *emit )
2968 {
2969 struct src_register vface, zero;
2970 SVGA3dShaderDestToken temp;
2971 struct src_register pass, fail;
2972
2973 vface = get_vface( emit );
2974 zero = get_zero_immediate( emit );
2975
2976 /* Can't use get_temp() to allocate the fake frontface reg as such
2977 * temporaries will be reclaimed after each instruction by the call
2978 * to reset_temp_regs().
2979 */
2980 temp = dst_register( SVGA3DREG_TEMP,
2981 emit->nr_hw_temp++ );
2982
2983 if (emit->key.fkey.front_ccw) {
2984 pass = scalar( zero, TGSI_SWIZZLE_X );
2985 fail = scalar( zero, TGSI_SWIZZLE_W );
2986 } else {
2987 pass = scalar( zero, TGSI_SWIZZLE_W );
2988 fail = scalar( zero, TGSI_SWIZZLE_X );
2989 }
2990
2991 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2992 temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2993 pass, fail))
2994 return FALSE;
2995
2996 /* Reassign the input_map to the actual front-face color:
2997 */
2998 emit->input_map[emit->internal_frontface_idx] = src(temp);
2999
3000 return TRUE;
3001 }
3002
3003
3004 /**
3005 * Emit code to invert the T component of the incoming texture coordinate.
3006 * This is used for drawing point sprites when
3007 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3008 */
3009 static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
3010 {
3011 struct src_register zero = get_zero_immediate(emit);
3012 struct src_register pos_neg_one = get_pos_neg_one_immediate( emit );
3013 unsigned inverted_texcoords = emit->inverted_texcoords;
3014
3015 while (inverted_texcoords) {
3016 const unsigned unit = ffs(inverted_texcoords) - 1;
3017
3018 assert(emit->inverted_texcoords & (1 << unit));
3019
3020 assert(unit < Elements(emit->ps_true_texcoord));
3021
3022 assert(unit < Elements(emit->ps_inverted_texcoord_input));
3023
3024 assert(emit->ps_inverted_texcoord_input[unit]
3025 < Elements(emit->input_map));
3026
3027 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3028 if (!submit_op3(emit,
3029 inst_token(SVGA3DOP_MAD),
3030 dst(emit->ps_inverted_texcoord[unit]),
3031 emit->ps_true_texcoord[unit],
3032 swizzle(pos_neg_one, 0, 3, 0, 0), /* (1, -1, 1, 1) */
3033 swizzle(zero, 0, 3, 0, 0))) /* (0, 1, 0, 0) */
3034 return FALSE;
3035
3036 /* Reassign the input_map entry to the new texcoord register */
3037 emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3038 emit->ps_inverted_texcoord[unit];
3039
3040 inverted_texcoords &= ~(1 << unit);
3041 }
3042
3043 return TRUE;
3044 }
3045
3046
3047 static INLINE boolean
3048 needs_to_create_zero( struct svga_shader_emitter *emit )
3049 {
3050 int i;
3051
3052 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3053 if (!emit->use_sm30)
3054 return TRUE;
3055
3056 if (emit->key.fkey.light_twoside)
3057 return TRUE;
3058
3059 if (emit->key.fkey.white_fragments)
3060 return TRUE;
3061
3062 if (emit->emit_frontface)
3063 return TRUE;
3064
3065 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3066 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3067 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3068 return TRUE;
3069
3070 if (emit->inverted_texcoords)
3071 return TRUE;
3072
3073 /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
3074 for (i = 0; i < emit->key.fkey.num_textures; i++) {
3075 if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
3076 emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
3077 emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
3078 emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
3079 return TRUE;
3080 }
3081 }
3082
3083 if (emit->unit == PIPE_SHADER_VERTEX) {
3084 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3085 return TRUE;
3086 }
3087
3088 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3089 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3090 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3091 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3092 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3093 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3094 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3095 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3096 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3097 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3098 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3099 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3100 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
3101 emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
3102 return TRUE;
3103
3104 for (i = 0; i < emit->key.fkey.num_textures; i++) {
3105 if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3106 return TRUE;
3107 }
3108
3109 return FALSE;
3110 }
3111
3112 static INLINE boolean
3113 needs_to_create_loop_const( struct svga_shader_emitter *emit )
3114 {
3115 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3116 }
3117
3118 static INLINE boolean
3119 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
3120 {
3121 return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
3122 emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
3123 emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
3124 }
3125
3126 static INLINE boolean
3127 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
3128 {
3129 return (emit->num_arl_consts > 0);
3130 }
3131
3132 static INLINE boolean
3133 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3134 int num, int current_arl)
3135 {
3136 int i;
3137 assert(num < 0);
3138
3139 for (i = 0; i < emit->num_arl_consts; ++i) {
3140 if (emit->arl_consts[i].arl_num == current_arl)
3141 break;
3142 }
3143 /* new entry */
3144 if (emit->num_arl_consts == i) {
3145 ++emit->num_arl_consts;
3146 }
3147 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3148 num :
3149 emit->arl_consts[i].number;
3150 emit->arl_consts[i].arl_num = current_arl;
3151 return TRUE;
3152 }
3153
3154 static boolean
3155 pre_parse_instruction( struct svga_shader_emitter *emit,
3156 const struct tgsi_full_instruction *insn,
3157 int current_arl)
3158 {
3159 if (insn->Src[0].Register.Indirect &&
3160 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3161 const struct tgsi_full_src_register *reg = &insn->Src[0];
3162 if (reg->Register.Index < 0) {
3163 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3164 }
3165 }
3166
3167 if (insn->Src[1].Register.Indirect &&
3168 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3169 const struct tgsi_full_src_register *reg = &insn->Src[1];
3170 if (reg->Register.Index < 0) {
3171 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3172 }
3173 }
3174
3175 if (insn->Src[2].Register.Indirect &&
3176 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3177 const struct tgsi_full_src_register *reg = &insn->Src[2];
3178 if (reg->Register.Index < 0) {
3179 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3180 }
3181 }
3182
3183 return TRUE;
3184 }
3185
3186 static boolean
3187 pre_parse_tokens( struct svga_shader_emitter *emit,
3188 const struct tgsi_token *tokens )
3189 {
3190 struct tgsi_parse_context parse;
3191 int current_arl = 0;
3192
3193 tgsi_parse_init( &parse, tokens );
3194
3195 while (!tgsi_parse_end_of_tokens( &parse )) {
3196 tgsi_parse_token( &parse );
3197 switch (parse.FullToken.Token.Type) {
3198 case TGSI_TOKEN_TYPE_IMMEDIATE:
3199 case TGSI_TOKEN_TYPE_DECLARATION:
3200 break;
3201 case TGSI_TOKEN_TYPE_INSTRUCTION:
3202 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3203 TGSI_OPCODE_ARL) {
3204 ++current_arl;
3205 }
3206 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3207 current_arl ))
3208 return FALSE;
3209 break;
3210 default:
3211 break;
3212 }
3213
3214 }
3215 return TRUE;
3216 }
3217
3218 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
3219
3220 {
3221 if (needs_to_create_zero( emit )) {
3222 create_zero_immediate( emit );
3223 }
3224 if (needs_to_create_loop_const( emit )) {
3225 create_loop_const( emit );
3226 }
3227 if (needs_to_create_sincos_consts( emit )) {
3228 create_sincos_consts( emit );
3229 }
3230 if (needs_to_create_arl_consts( emit )) {
3231 create_arl_consts( emit );
3232 }
3233
3234 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3235 if (!emit_ps_preamble( emit ))
3236 return FALSE;
3237
3238 if (emit->key.fkey.light_twoside) {
3239 if (!emit_light_twoside( emit ))
3240 return FALSE;
3241 }
3242 if (emit->emit_frontface) {
3243 if (!emit_frontface( emit ))
3244 return FALSE;
3245 }
3246 if (emit->inverted_texcoords) {
3247 if (!emit_inverted_texcoords( emit ))
3248 return FALSE;
3249 }
3250 }
3251
3252 return TRUE;
3253 }
3254
3255 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
3256 const struct tgsi_token *tokens )
3257 {
3258 struct tgsi_parse_context parse;
3259 boolean ret = TRUE;
3260 boolean helpers_emitted = FALSE;
3261 unsigned line_nr = 0;
3262
3263 tgsi_parse_init( &parse, tokens );
3264 emit->internal_imm_count = 0;
3265
3266 if (emit->unit == PIPE_SHADER_VERTEX) {
3267 ret = emit_vs_preamble( emit );
3268 if (!ret)
3269 goto done;
3270 }
3271
3272 pre_parse_tokens(emit, tokens);
3273
3274 while (!tgsi_parse_end_of_tokens( &parse )) {
3275 tgsi_parse_token( &parse );
3276
3277 switch (parse.FullToken.Token.Type) {
3278 case TGSI_TOKEN_TYPE_IMMEDIATE:
3279 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3280 if (!ret)
3281 goto done;
3282 break;
3283
3284 case TGSI_TOKEN_TYPE_DECLARATION:
3285 if (emit->use_sm30)
3286 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3287 else
3288 ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
3289 if (!ret)
3290 goto done;
3291 break;
3292
3293 case TGSI_TOKEN_TYPE_INSTRUCTION:
3294 if (!helpers_emitted) {
3295 if (!svga_shader_emit_helpers( emit ))
3296 goto done;
3297 helpers_emitted = TRUE;
3298 }
3299 ret = svga_emit_instruction( emit,
3300 line_nr++,
3301 &parse.FullToken.FullInstruction );
3302 if (!ret)
3303 goto done;
3304 break;
3305 default:
3306 break;
3307 }
3308
3309 reset_temp_regs( emit );
3310 }
3311
3312 /* Need to terminate the current subroutine. Note that the
3313 * hardware doesn't tolerate shaders without sub-routines
3314 * terminating with RET+END.
3315 */
3316 if (!emit->in_main_func) {
3317 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3318 if (!ret)
3319 goto done;
3320 }
3321
3322 assert(emit->dynamic_branching_level == 0);
3323
3324 /* Need to terminate the whole shader:
3325 */
3326 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3327 if (!ret)
3328 goto done;
3329
3330 done:
3331 tgsi_parse_free( &parse );
3332 return ret;
3333 }
3334