Merge branch 'master' into pipe-video
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "util/u_memory.h"
30
31 #include "svga_tgsi_emit.h"
32 #include "svga_context.h"
33
34
35 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
36 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
37
38
39
40
41 static unsigned
42 translate_opcode(
43 uint opcode )
44 {
45 switch (opcode) {
46 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
47 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
48 case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC;
49 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
50 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
51 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
52 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
53 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
54 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
55 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
56 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
57 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
58 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
59 case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
60 case TGSI_OPCODE_SSG: return SVGA3DOP_SGN;
61 default:
62 debug_printf("Unkown opcode %u\n", opcode);
63 assert( 0 );
64 return SVGA3DOP_LAST_INST;
65 }
66 }
67
68
69 static unsigned translate_file( unsigned file )
70 {
71 switch (file) {
72 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
73 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
74 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
75 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
76 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
77 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
78 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
79 default:
80 assert( 0 );
81 return SVGA3DREG_TEMP;
82 }
83 }
84
85
86
87
88
89
90 static SVGA3dShaderDestToken
91 translate_dst_register( struct svga_shader_emitter *emit,
92 const struct tgsi_full_instruction *insn,
93 unsigned idx )
94 {
95 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
96 SVGA3dShaderDestToken dest;
97
98 switch (reg->Register.File) {
99 case TGSI_FILE_OUTPUT:
100 /* Output registers encode semantic information in their name.
101 * Need to lookup a table built at decl time:
102 */
103 dest = emit->output_map[reg->Register.Index];
104 break;
105
106 default:
107 dest = dst_register( translate_file( reg->Register.File ),
108 reg->Register.Index );
109 break;
110 }
111
112 dest.mask = reg->Register.WriteMask;
113 assert(dest.mask);
114
115 if (insn->Instruction.Saturate)
116 dest.dstMod = SVGA3DDSTMOD_SATURATE;
117
118 return dest;
119 }
120
121
122 static struct src_register
123 swizzle( struct src_register src,
124 int x,
125 int y,
126 int z,
127 int w )
128 {
129 x = (src.base.swizzle >> (x * 2)) & 0x3;
130 y = (src.base.swizzle >> (y * 2)) & 0x3;
131 z = (src.base.swizzle >> (z * 2)) & 0x3;
132 w = (src.base.swizzle >> (w * 2)) & 0x3;
133
134 src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
135
136 return src;
137 }
138
139 static struct src_register
140 scalar( struct src_register src,
141 int comp )
142 {
143 return swizzle( src, comp, comp, comp, comp );
144 }
145
146 static INLINE boolean
147 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
148 {
149 int i;
150
151 for (i = 0; i < emit->num_arl_consts; ++i) {
152 if (emit->arl_consts[i].arl_num == emit->current_arl)
153 return TRUE;
154 }
155 return FALSE;
156 }
157
158 static INLINE int
159 svga_arl_adjustment( const struct svga_shader_emitter *emit )
160 {
161 int i;
162
163 for (i = 0; i < emit->num_arl_consts; ++i) {
164 if (emit->arl_consts[i].arl_num == emit->current_arl)
165 return emit->arl_consts[i].number;
166 }
167 return 0;
168 }
169
170 static struct src_register
171 translate_src_register( const struct svga_shader_emitter *emit,
172 const struct tgsi_full_src_register *reg )
173 {
174 struct src_register src;
175
176 switch (reg->Register.File) {
177 case TGSI_FILE_INPUT:
178 /* Input registers are referred to by their semantic name rather
179 * than by index. Use the mapping build up from the decls:
180 */
181 src = emit->input_map[reg->Register.Index];
182 break;
183
184 case TGSI_FILE_IMMEDIATE:
185 /* Immediates are appended after TGSI constants in the D3D
186 * constant buffer.
187 */
188 src = src_register( translate_file( reg->Register.File ),
189 reg->Register.Index +
190 emit->imm_start );
191 break;
192
193 default:
194 src = src_register( translate_file( reg->Register.File ),
195 reg->Register.Index );
196
197 break;
198 }
199
200 /* Indirect addressing.
201 */
202 if (reg->Register.Indirect) {
203 if (emit->unit == PIPE_SHADER_FRAGMENT) {
204 /* Pixel shaders have only loop registers for relative
205 * addressing into inputs. Ignore the redundant address
206 * register, the contents of aL should be in sync with it.
207 */
208 if (reg->Register.File == TGSI_FILE_INPUT) {
209 src.base.relAddr = 1;
210 src.indirect = src_token(SVGA3DREG_LOOP, 0);
211 }
212 }
213 else {
214 /* Constant buffers only.
215 */
216 if (reg->Register.File == TGSI_FILE_CONSTANT) {
217 /* we shift the offset towards the minimum */
218 if (svga_arl_needs_adjustment( emit )) {
219 src.base.num -= svga_arl_adjustment( emit );
220 }
221 src.base.relAddr = 1;
222
223 /* Not really sure what should go in the second token:
224 */
225 src.indirect = src_token( SVGA3DREG_ADDR,
226 reg->Indirect.Index );
227
228 src.indirect.swizzle = SWIZZLE_XXXX;
229 }
230 }
231 }
232
233 src = swizzle( src,
234 reg->Register.SwizzleX,
235 reg->Register.SwizzleY,
236 reg->Register.SwizzleZ,
237 reg->Register.SwizzleW );
238
239 /* src.mod isn't a bitfield, unfortunately:
240 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
241 */
242 if (reg->Register.Absolute) {
243 if (reg->Register.Negate)
244 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
245 else
246 src.base.srcMod = SVGA3DSRCMOD_ABS;
247 }
248 else {
249 if (reg->Register.Negate)
250 src.base.srcMod = SVGA3DSRCMOD_NEG;
251 else
252 src.base.srcMod = SVGA3DSRCMOD_NONE;
253 }
254
255 return src;
256 }
257
258
259 /*
260 * Get a temporary register, return -1 if none available
261 */
262 static INLINE SVGA3dShaderDestToken
263 get_temp( struct svga_shader_emitter *emit )
264 {
265 int i = emit->nr_hw_temp + emit->internal_temp_count++;
266
267 return dst_register( SVGA3DREG_TEMP, i );
268 }
269
270 /* Release a single temp. Currently only effective if it was the last
271 * allocated temp, otherwise release will be delayed until the next
272 * call to reset_temp_regs().
273 */
274 static INLINE void
275 release_temp( struct svga_shader_emitter *emit,
276 SVGA3dShaderDestToken temp )
277 {
278 if (temp.num == emit->internal_temp_count - 1)
279 emit->internal_temp_count--;
280 }
281
282 static void reset_temp_regs( struct svga_shader_emitter *emit )
283 {
284 emit->internal_temp_count = 0;
285 }
286
287
288 static boolean submit_op0( struct svga_shader_emitter *emit,
289 SVGA3dShaderInstToken inst,
290 SVGA3dShaderDestToken dest )
291 {
292 return (emit_instruction( emit, inst ) &&
293 emit_dst( emit, dest ));
294 }
295
296 static boolean submit_op1( struct svga_shader_emitter *emit,
297 SVGA3dShaderInstToken inst,
298 SVGA3dShaderDestToken dest,
299 struct src_register src0 )
300 {
301 return emit_op1( emit, inst, dest, src0 );
302 }
303
304
305 /* SVGA shaders may not refer to >1 constant register in a single
306 * instruction. This function checks for that usage and inserts a
307 * move to temporary if detected.
308 *
309 * The same applies to input registers -- at most a single input
310 * register may be read by any instruction.
311 */
312 static boolean submit_op2( struct svga_shader_emitter *emit,
313 SVGA3dShaderInstToken inst,
314 SVGA3dShaderDestToken dest,
315 struct src_register src0,
316 struct src_register src1 )
317 {
318 SVGA3dShaderDestToken temp;
319 SVGA3dShaderRegType type0, type1;
320 boolean need_temp = FALSE;
321
322 temp.value = 0;
323 type0 = SVGA3dShaderGetRegType( src0.base.value );
324 type1 = SVGA3dShaderGetRegType( src1.base.value );
325
326 if (type0 == SVGA3DREG_CONST &&
327 type1 == SVGA3DREG_CONST &&
328 src0.base.num != src1.base.num)
329 need_temp = TRUE;
330
331 if (type0 == SVGA3DREG_INPUT &&
332 type1 == SVGA3DREG_INPUT &&
333 src0.base.num != src1.base.num)
334 need_temp = TRUE;
335
336 if (need_temp)
337 {
338 temp = get_temp( emit );
339
340 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
341 return FALSE;
342
343 src0 = src( temp );
344 }
345
346 if (!emit_op2( emit, inst, dest, src0, src1 ))
347 return FALSE;
348
349 if (need_temp)
350 release_temp( emit, temp );
351
352 return TRUE;
353 }
354
355
356 /* SVGA shaders may not refer to >1 constant register in a single
357 * instruction. This function checks for that usage and inserts a
358 * move to temporary if detected.
359 */
360 static boolean submit_op3( struct svga_shader_emitter *emit,
361 SVGA3dShaderInstToken inst,
362 SVGA3dShaderDestToken dest,
363 struct src_register src0,
364 struct src_register src1,
365 struct src_register src2 )
366 {
367 SVGA3dShaderDestToken temp0;
368 SVGA3dShaderDestToken temp1;
369 boolean need_temp0 = FALSE;
370 boolean need_temp1 = FALSE;
371 SVGA3dShaderRegType type0, type1, type2;
372
373 temp0.value = 0;
374 temp1.value = 0;
375 type0 = SVGA3dShaderGetRegType( src0.base.value );
376 type1 = SVGA3dShaderGetRegType( src1.base.value );
377 type2 = SVGA3dShaderGetRegType( src2.base.value );
378
379 if (inst.op != SVGA3DOP_SINCOS) {
380 if (type0 == SVGA3DREG_CONST &&
381 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
382 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
383 need_temp0 = TRUE;
384
385 if (type1 == SVGA3DREG_CONST &&
386 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
387 need_temp1 = TRUE;
388 }
389
390 if (type0 == SVGA3DREG_INPUT &&
391 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
392 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
393 need_temp0 = TRUE;
394
395 if (type1 == SVGA3DREG_INPUT &&
396 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
397 need_temp1 = TRUE;
398
399 if (need_temp0)
400 {
401 temp0 = get_temp( emit );
402
403 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
404 return FALSE;
405
406 src0 = src( temp0 );
407 }
408
409 if (need_temp1)
410 {
411 temp1 = get_temp( emit );
412
413 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
414 return FALSE;
415
416 src1 = src( temp1 );
417 }
418
419 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
420 return FALSE;
421
422 if (need_temp1)
423 release_temp( emit, temp1 );
424 if (need_temp0)
425 release_temp( emit, temp0 );
426 return TRUE;
427 }
428
429
430
431
432 /* SVGA shaders may not refer to >1 constant register in a single
433 * instruction. This function checks for that usage and inserts a
434 * move to temporary if detected.
435 */
436 static boolean submit_op4( struct svga_shader_emitter *emit,
437 SVGA3dShaderInstToken inst,
438 SVGA3dShaderDestToken dest,
439 struct src_register src0,
440 struct src_register src1,
441 struct src_register src2,
442 struct src_register src3)
443 {
444 SVGA3dShaderDestToken temp0;
445 SVGA3dShaderDestToken temp3;
446 boolean need_temp0 = FALSE;
447 boolean need_temp3 = FALSE;
448 SVGA3dShaderRegType type0, type1, type2, type3;
449
450 temp0.value = 0;
451 temp3.value = 0;
452 type0 = SVGA3dShaderGetRegType( src0.base.value );
453 type1 = SVGA3dShaderGetRegType( src1.base.value );
454 type2 = SVGA3dShaderGetRegType( src2.base.value );
455 type3 = SVGA3dShaderGetRegType( src2.base.value );
456
457 /* Make life a little easier - this is only used by the TXD
458 * instruction which is guaranteed not to have a constant/input reg
459 * in one slot at least:
460 */
461 assert(type1 == SVGA3DREG_SAMPLER);
462
463 if (type0 == SVGA3DREG_CONST &&
464 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
465 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
466 need_temp0 = TRUE;
467
468 if (type3 == SVGA3DREG_CONST &&
469 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
470 need_temp3 = TRUE;
471
472 if (type0 == SVGA3DREG_INPUT &&
473 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
474 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
475 need_temp0 = TRUE;
476
477 if (type3 == SVGA3DREG_INPUT &&
478 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
479 need_temp3 = TRUE;
480
481 if (need_temp0)
482 {
483 temp0 = get_temp( emit );
484
485 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
486 return FALSE;
487
488 src0 = src( temp0 );
489 }
490
491 if (need_temp3)
492 {
493 temp3 = get_temp( emit );
494
495 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
496 return FALSE;
497
498 src3 = src( temp3 );
499 }
500
501 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
502 return FALSE;
503
504 if (need_temp3)
505 release_temp( emit, temp3 );
506 if (need_temp0)
507 release_temp( emit, temp0 );
508 return TRUE;
509 }
510
511
512 static boolean emit_def_const( struct svga_shader_emitter *emit,
513 SVGA3dShaderConstType type,
514 unsigned idx,
515 float a,
516 float b,
517 float c,
518 float d )
519 {
520 SVGA3DOpDefArgs def;
521 SVGA3dShaderInstToken opcode;
522
523 switch (type) {
524 case SVGA3D_CONST_TYPE_FLOAT:
525 opcode = inst_token( SVGA3DOP_DEF );
526 def.dst = dst_register( SVGA3DREG_CONST, idx );
527 def.constValues[0] = a;
528 def.constValues[1] = b;
529 def.constValues[2] = c;
530 def.constValues[3] = d;
531 break;
532 case SVGA3D_CONST_TYPE_INT:
533 opcode = inst_token( SVGA3DOP_DEFI );
534 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
535 def.constIValues[0] = (int)a;
536 def.constIValues[1] = (int)b;
537 def.constIValues[2] = (int)c;
538 def.constIValues[3] = (int)d;
539 break;
540 default:
541 assert(0);
542 opcode = inst_token( SVGA3DOP_NOP );
543 break;
544 }
545
546 if (!emit_instruction(emit, opcode) ||
547 !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
548 return FALSE;
549
550 return TRUE;
551 }
552
553 static INLINE boolean
554 create_zero_immediate( struct svga_shader_emitter *emit )
555 {
556 unsigned idx = emit->nr_hw_float_const++;
557
558 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
559 idx, 0, 0, 0, 1 ))
560 return FALSE;
561
562 emit->zero_immediate_idx = idx;
563 emit->created_zero_immediate = TRUE;
564
565 return TRUE;
566 }
567
568 static INLINE boolean
569 create_loop_const( struct svga_shader_emitter *emit )
570 {
571 unsigned idx = emit->nr_hw_int_const++;
572
573 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
574 255, /* iteration count */
575 0, /* initial value */
576 1, /* step size */
577 0 /* not used, must be 0 */))
578 return FALSE;
579
580 emit->loop_const_idx = idx;
581 emit->created_loop_const = TRUE;
582
583 return TRUE;
584 }
585
586 static INLINE boolean
587 create_sincos_consts( struct svga_shader_emitter *emit )
588 {
589 unsigned idx = emit->nr_hw_float_const++;
590
591 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
592 -1.5500992e-006f,
593 -2.1701389e-005f,
594 0.0026041667f,
595 0.00026041668f ))
596 return FALSE;
597
598 emit->sincos_consts_idx = idx;
599 idx = emit->nr_hw_float_const++;
600
601 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
602 -0.020833334f,
603 -0.12500000f,
604 1.0f,
605 0.50000000f ))
606 return FALSE;
607
608 emit->created_sincos_consts = TRUE;
609
610 return TRUE;
611 }
612
613 static INLINE boolean
614 create_arl_consts( struct svga_shader_emitter *emit )
615 {
616 int i;
617
618 for (i = 0; i < emit->num_arl_consts; i += 4) {
619 int j;
620 unsigned idx = emit->nr_hw_float_const++;
621 float vals[4];
622 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
623 vals[j] = emit->arl_consts[i + j].number;
624 emit->arl_consts[i + j].idx = idx;
625 switch (j) {
626 case 0:
627 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
628 break;
629 case 1:
630 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
631 break;
632 case 2:
633 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
634 break;
635 case 3:
636 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
637 break;
638 }
639 }
640 while (j < 4)
641 vals[j++] = 0;
642
643 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
644 vals[0], vals[1],
645 vals[2], vals[3]))
646 return FALSE;
647 }
648
649 return TRUE;
650 }
651
652 static INLINE struct src_register
653 get_vface( struct svga_shader_emitter *emit )
654 {
655 assert(emit->emitted_vface);
656 return src_register(SVGA3DREG_MISCTYPE,
657 SVGA3DMISCREG_FACE);
658 }
659
660 /* returns {0, 0, 0, 1} immediate */
661 static INLINE struct src_register
662 get_zero_immediate( struct svga_shader_emitter *emit )
663 {
664 assert(emit->created_zero_immediate);
665 assert(emit->zero_immediate_idx >= 0);
666 return src_register( SVGA3DREG_CONST,
667 emit->zero_immediate_idx );
668 }
669
670 /* returns the loop const */
671 static INLINE struct src_register
672 get_loop_const( struct svga_shader_emitter *emit )
673 {
674 assert(emit->created_loop_const);
675 assert(emit->loop_const_idx >= 0);
676 return src_register( SVGA3DREG_CONSTINT,
677 emit->loop_const_idx );
678 }
679
680 /* returns a sincos const */
681 static INLINE struct src_register
682 get_sincos_const( struct svga_shader_emitter *emit,
683 unsigned index )
684 {
685 assert(emit->created_sincos_consts);
686 assert(emit->sincos_consts_idx >= 0);
687 assert(index == 0 || index == 1);
688 return src_register( SVGA3DREG_CONST,
689 emit->sincos_consts_idx + index );
690 }
691
692 static INLINE struct src_register
693 get_fake_arl_const( struct svga_shader_emitter *emit )
694 {
695 struct src_register reg;
696 int idx = 0, swizzle = 0, i;
697
698 for (i = 0; i < emit->num_arl_consts; ++ i) {
699 if (emit->arl_consts[i].arl_num == emit->current_arl) {
700 idx = emit->arl_consts[i].idx;
701 swizzle = emit->arl_consts[i].swizzle;
702 }
703 }
704
705 reg = src_register( SVGA3DREG_CONST, idx );
706 return scalar(reg, swizzle);
707 }
708
709 static INLINE struct src_register
710 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
711 {
712 int idx;
713 struct src_register reg;
714
715 /* the width/height indexes start right after constants */
716 idx = emit->key.fkey.tex[sampler_num].width_height_idx +
717 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
718
719 reg = src_register( SVGA3DREG_CONST, idx );
720 return reg;
721 }
722
723 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
724 const struct tgsi_full_instruction *insn)
725 {
726 const struct src_register src0 = translate_src_register(
727 emit, &insn->Src[0] );
728 struct src_register src1 = get_fake_arl_const( emit );
729 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
730 SVGA3dShaderDestToken tmp = get_temp( emit );
731
732 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
733 return FALSE;
734
735 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
736 src1))
737 return FALSE;
738
739 /* replicate the original swizzle */
740 src1 = src(tmp);
741 src1.base.swizzle = src0.base.swizzle;
742
743 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
744 dst, src1 );
745 }
746
747 static boolean emit_if(struct svga_shader_emitter *emit,
748 const struct tgsi_full_instruction *insn)
749 {
750 const struct src_register src = translate_src_register(
751 emit, &insn->Src[0] );
752 struct src_register zero = get_zero_immediate( emit );
753 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
754
755 if_token.control = SVGA3DOPCOMPC_NE;
756 zero = scalar(zero, TGSI_SWIZZLE_X);
757
758 emit->dynamic_branching_level++;
759
760 return (emit_instruction( emit, if_token ) &&
761 emit_src( emit, src ) &&
762 emit_src( emit, zero ) );
763 }
764
765 static boolean emit_endif(struct svga_shader_emitter *emit,
766 const struct tgsi_full_instruction *insn)
767 {
768 emit->dynamic_branching_level--;
769
770 return (emit_instruction( emit,
771 inst_token( SVGA3DOP_ENDIF )));
772 }
773
774 static boolean emit_else(struct svga_shader_emitter *emit,
775 const struct tgsi_full_instruction *insn)
776 {
777 return (emit_instruction( emit,
778 inst_token( SVGA3DOP_ELSE )));
779 }
780
781 /* Translate the following TGSI FLR instruction.
782 * FLR DST, SRC
783 * To the following SVGA3D instruction sequence.
784 * FRC TMP, SRC
785 * SUB DST, SRC, TMP
786 */
787 static boolean emit_floor(struct svga_shader_emitter *emit,
788 const struct tgsi_full_instruction *insn )
789 {
790 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
791 const struct src_register src0 = translate_src_register(
792 emit, &insn->Src[0] );
793 SVGA3dShaderDestToken temp = get_temp( emit );
794
795 /* FRC TMP, SRC */
796 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
797 return FALSE;
798
799 /* SUB DST, SRC, TMP */
800 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
801 negate( src( temp ) ) ))
802 return FALSE;
803
804 return TRUE;
805 }
806
807
808 /* Translate the following TGSI CMP instruction.
809 * CMP DST, SRC0, SRC1, SRC2
810 * To the following SVGA3D instruction sequence.
811 * CMP DST, SRC0, SRC2, SRC1
812 */
813 static boolean emit_cmp(struct svga_shader_emitter *emit,
814 const struct tgsi_full_instruction *insn )
815 {
816 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
817 const struct src_register src0 = translate_src_register(
818 emit, &insn->Src[0] );
819 const struct src_register src1 = translate_src_register(
820 emit, &insn->Src[1] );
821 const struct src_register src2 = translate_src_register(
822 emit, &insn->Src[2] );
823
824 if (emit->unit == PIPE_SHADER_VERTEX) {
825 SVGA3dShaderDestToken temp = get_temp(emit);
826 struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
827
828 /* Since vertex shaders don't support the CMP instruction,
829 * simulate it with SLT and LRP instructions.
830 * SLT TMP, SRC0, 0.0
831 * LRP DST, TMP, SRC1, SRC2
832 */
833 if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
834 return FALSE;
835 return submit_op3(emit, inst_token(SVGA3DOP_LRP), dst, src(temp), src1, src2);
836 }
837
838 /* CMP DST, SRC0, SRC2, SRC1 */
839 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
840 }
841
842
843
844 /* Translate the following TGSI DIV instruction.
845 * DIV DST.xy, SRC0, SRC1
846 * To the following SVGA3D instruction sequence.
847 * RCP TMP.x, SRC1.xxxx
848 * RCP TMP.y, SRC1.yyyy
849 * MUL DST.xy, SRC0, TMP
850 */
851 static boolean emit_div(struct svga_shader_emitter *emit,
852 const struct tgsi_full_instruction *insn )
853 {
854 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
855 const struct src_register src0 = translate_src_register(
856 emit, &insn->Src[0] );
857 const struct src_register src1 = translate_src_register(
858 emit, &insn->Src[1] );
859 SVGA3dShaderDestToken temp = get_temp( emit );
860 int i;
861
862 /* For each enabled element, perform a RCP instruction. Note that
863 * RCP is scalar in SVGA3D:
864 */
865 for (i = 0; i < 4; i++) {
866 unsigned channel = 1 << i;
867 if (dst.mask & channel) {
868 /* RCP TMP.?, SRC1.???? */
869 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
870 writemask(temp, channel),
871 scalar(src1, i) ))
872 return FALSE;
873 }
874 }
875
876 /* Then multiply them out with a single mul:
877 *
878 * MUL DST, SRC0, TMP
879 */
880 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
881 src( temp ) ))
882 return FALSE;
883
884 return TRUE;
885 }
886
887 /* Translate the following TGSI DP2 instruction.
888 * DP2 DST, SRC1, SRC2
889 * To the following SVGA3D instruction sequence.
890 * MUL TMP, SRC1, SRC2
891 * ADD DST, TMP.xxxx, TMP.yyyy
892 */
893 static boolean emit_dp2(struct svga_shader_emitter *emit,
894 const struct tgsi_full_instruction *insn )
895 {
896 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
897 const struct src_register src0 = translate_src_register(
898 emit, &insn->Src[0] );
899 const struct src_register src1 = translate_src_register(
900 emit, &insn->Src[1] );
901 SVGA3dShaderDestToken temp = get_temp( emit );
902 struct src_register temp_src0, temp_src1;
903
904 /* MUL TMP, SRC1, SRC2 */
905 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
906 return FALSE;
907
908 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
909 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
910
911 /* ADD DST, TMP.xxxx, TMP.yyyy */
912 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
913 temp_src0, temp_src1 ))
914 return FALSE;
915
916 return TRUE;
917 }
918
919
920 /* Translate the following TGSI DPH instruction.
921 * DPH DST, SRC1, SRC2
922 * To the following SVGA3D instruction sequence.
923 * DP3 TMP, SRC1, SRC2
924 * ADD DST, TMP, SRC2.wwww
925 */
926 static boolean emit_dph(struct svga_shader_emitter *emit,
927 const struct tgsi_full_instruction *insn )
928 {
929 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
930 const struct src_register src0 = translate_src_register(
931 emit, &insn->Src[0] );
932 struct src_register src1 = translate_src_register(
933 emit, &insn->Src[1] );
934 SVGA3dShaderDestToken temp = get_temp( emit );
935
936 /* DP3 TMP, SRC1, SRC2 */
937 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
938 return FALSE;
939
940 src1 = scalar(src1, TGSI_SWIZZLE_W);
941
942 /* ADD DST, TMP, SRC2.wwww */
943 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
944 src( temp ), src1 ))
945 return FALSE;
946
947 return TRUE;
948 }
949
950 /* Translate the following TGSI DST instruction.
951 * NRM DST, SRC
952 * To the following SVGA3D instruction sequence.
953 * DP3 TMP, SRC, SRC
954 * RSQ TMP, TMP
955 * MUL DST, SRC, TMP
956 */
957 static boolean emit_nrm(struct svga_shader_emitter *emit,
958 const struct tgsi_full_instruction *insn )
959 {
960 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
961 const struct src_register src0 = translate_src_register(
962 emit, &insn->Src[0] );
963 SVGA3dShaderDestToken temp = get_temp( emit );
964
965 /* DP3 TMP, SRC, SRC */
966 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
967 return FALSE;
968
969 /* RSQ TMP, TMP */
970 if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
971 return FALSE;
972
973 /* MUL DST, SRC, TMP */
974 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
975 src0, src( temp )))
976 return FALSE;
977
978 return TRUE;
979
980 }
981
982 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
983 SVGA3dShaderDestToken dst,
984 struct src_register src0)
985 {
986 src0 = scalar(src0, TGSI_SWIZZLE_X);
987
988 if (emit->use_sm30) {
989 return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
990 dst, src0 );
991 } else {
992 struct src_register const1 = get_sincos_const( emit, 0 );
993 struct src_register const2 = get_sincos_const( emit, 1 );
994
995 return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
996 dst, src0, const1, const2 );
997 }
998 }
999
1000 static boolean emit_sincos(struct svga_shader_emitter *emit,
1001 const struct tgsi_full_instruction *insn)
1002 {
1003 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1004 struct src_register src0 = translate_src_register(
1005 emit, &insn->Src[0] );
1006 SVGA3dShaderDestToken temp = get_temp( emit );
1007
1008 /* SCS TMP SRC */
1009 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
1010 return FALSE;
1011
1012 /* MOV DST TMP */
1013 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
1014 return FALSE;
1015
1016 return TRUE;
1017 }
1018
1019 /*
1020 * SCS TMP SRC
1021 * MOV DST TMP.yyyy
1022 */
1023 static boolean emit_sin(struct svga_shader_emitter *emit,
1024 const struct tgsi_full_instruction *insn )
1025 {
1026 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1027 struct src_register src0 = translate_src_register(
1028 emit, &insn->Src[0] );
1029 SVGA3dShaderDestToken temp = get_temp( emit );
1030
1031 /* SCS TMP SRC */
1032 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1033 return FALSE;
1034
1035 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1036
1037 /* MOV DST TMP.yyyy */
1038 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1039 return FALSE;
1040
1041 return TRUE;
1042 }
1043
1044 /*
1045 * SCS TMP SRC
1046 * MOV DST TMP.xxxx
1047 */
1048 static boolean emit_cos(struct svga_shader_emitter *emit,
1049 const struct tgsi_full_instruction *insn )
1050 {
1051 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1052 struct src_register src0 = translate_src_register(
1053 emit, &insn->Src[0] );
1054 SVGA3dShaderDestToken temp = get_temp( emit );
1055
1056 /* SCS TMP SRC */
1057 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1058 return FALSE;
1059
1060 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1061
1062 /* MOV DST TMP.xxxx */
1063 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1064 return FALSE;
1065
1066 return TRUE;
1067 }
1068
1069
1070 /*
1071 * ADD DST SRC0, negate(SRC0)
1072 */
1073 static boolean emit_sub(struct svga_shader_emitter *emit,
1074 const struct tgsi_full_instruction *insn)
1075 {
1076 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1077 struct src_register src0 = translate_src_register(
1078 emit, &insn->Src[0] );
1079 struct src_register src1 = translate_src_register(
1080 emit, &insn->Src[1] );
1081
1082 src1 = negate(src1);
1083
1084 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1085 src0, src1 ))
1086 return FALSE;
1087
1088 return TRUE;
1089 }
1090
1091
1092 static boolean emit_kil(struct svga_shader_emitter *emit,
1093 const struct tgsi_full_instruction *insn )
1094 {
1095 SVGA3dShaderInstToken inst;
1096 const struct tgsi_full_src_register *reg = &insn->Src[0];
1097 struct src_register src0;
1098
1099 inst = inst_token( SVGA3DOP_TEXKILL );
1100 src0 = translate_src_register( emit, reg );
1101
1102 if (reg->Register.Absolute ||
1103 reg->Register.Negate ||
1104 reg->Register.Indirect ||
1105 reg->Register.SwizzleX != 0 ||
1106 reg->Register.SwizzleY != 1 ||
1107 reg->Register.SwizzleZ != 2 ||
1108 reg->Register.File != TGSI_FILE_TEMPORARY)
1109 {
1110 SVGA3dShaderDestToken temp = get_temp( emit );
1111
1112 submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
1113 src0 = src( temp );
1114 }
1115
1116 return submit_op0( emit, inst, dst(src0) );
1117 }
1118
1119
1120 /* mesa state tracker always emits kilp as an unconditional
1121 * kil */
1122 static boolean emit_kilp(struct svga_shader_emitter *emit,
1123 const struct tgsi_full_instruction *insn )
1124 {
1125 SVGA3dShaderInstToken inst;
1126 SVGA3dShaderDestToken temp;
1127 struct src_register one = scalar( get_zero_immediate( emit ),
1128 TGSI_SWIZZLE_W );
1129
1130 inst = inst_token( SVGA3DOP_TEXKILL );
1131
1132 /* texkill doesn't allow negation on the operand so lets move
1133 * negation of {1} to a temp register */
1134 temp = get_temp( emit );
1135 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1136 negate( one ) ))
1137 return FALSE;
1138
1139 return submit_op0( emit, inst, temp );
1140 }
1141
1142 /* Implement conditionals by initializing destination reg to 'fail',
1143 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1144 * based on predicate reg.
1145 *
1146 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1147 * MOV dst, fail
1148 * MOV dst, pass, p0
1149 */
1150 static boolean
1151 emit_conditional(struct svga_shader_emitter *emit,
1152 unsigned compare_func,
1153 SVGA3dShaderDestToken dst,
1154 struct src_register src0,
1155 struct src_register src1,
1156 struct src_register pass,
1157 struct src_register fail)
1158 {
1159 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1160 SVGA3dShaderInstToken setp_token, mov_token;
1161 setp_token = inst_token( SVGA3DOP_SETP );
1162
1163 switch (compare_func) {
1164 case PIPE_FUNC_NEVER:
1165 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1166 dst, fail );
1167 break;
1168 case PIPE_FUNC_LESS:
1169 setp_token.control = SVGA3DOPCOMP_LT;
1170 break;
1171 case PIPE_FUNC_EQUAL:
1172 setp_token.control = SVGA3DOPCOMP_EQ;
1173 break;
1174 case PIPE_FUNC_LEQUAL:
1175 setp_token.control = SVGA3DOPCOMP_LE;
1176 break;
1177 case PIPE_FUNC_GREATER:
1178 setp_token.control = SVGA3DOPCOMP_GT;
1179 break;
1180 case PIPE_FUNC_NOTEQUAL:
1181 setp_token.control = SVGA3DOPCOMPC_NE;
1182 break;
1183 case PIPE_FUNC_GEQUAL:
1184 setp_token.control = SVGA3DOPCOMP_GE;
1185 break;
1186 case PIPE_FUNC_ALWAYS:
1187 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1188 dst, pass );
1189 break;
1190 }
1191
1192 /* SETP src0, COMPOP, src1 */
1193 if (!submit_op2( emit, setp_token, pred_reg,
1194 src0, src1 ))
1195 return FALSE;
1196
1197 mov_token = inst_token( SVGA3DOP_MOV );
1198
1199 /* MOV dst, fail */
1200 if (!submit_op1( emit, mov_token, dst,
1201 fail ))
1202 return FALSE;
1203
1204 /* MOV dst, pass (predicated)
1205 *
1206 * Note that the predicate reg (and possible modifiers) is passed
1207 * as the first source argument.
1208 */
1209 mov_token.predicated = 1;
1210 if (!submit_op2( emit, mov_token, dst,
1211 src( pred_reg ), pass ))
1212 return FALSE;
1213
1214 return TRUE;
1215 }
1216
1217
1218 static boolean
1219 emit_select(struct svga_shader_emitter *emit,
1220 unsigned compare_func,
1221 SVGA3dShaderDestToken dst,
1222 struct src_register src0,
1223 struct src_register src1 )
1224 {
1225 /* There are some SVGA instructions which implement some selects
1226 * directly, but they are only available in the vertex shader.
1227 */
1228 if (emit->unit == PIPE_SHADER_VERTEX) {
1229 switch (compare_func) {
1230 case PIPE_FUNC_GEQUAL:
1231 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1232 case PIPE_FUNC_LEQUAL:
1233 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1234 case PIPE_FUNC_GREATER:
1235 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1236 case PIPE_FUNC_LESS:
1237 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1238 default:
1239 break;
1240 }
1241 }
1242
1243
1244 /* Otherwise, need to use the setp approach:
1245 */
1246 {
1247 struct src_register one, zero;
1248 /* zero immediate is 0,0,0,1 */
1249 zero = get_zero_immediate( emit );
1250 one = scalar( zero, TGSI_SWIZZLE_W );
1251 zero = scalar( zero, TGSI_SWIZZLE_X );
1252
1253 return emit_conditional(
1254 emit,
1255 compare_func,
1256 dst,
1257 src0,
1258 src1,
1259 one, zero);
1260 }
1261 }
1262
1263
1264 static boolean emit_select_op(struct svga_shader_emitter *emit,
1265 unsigned compare,
1266 const struct tgsi_full_instruction *insn)
1267 {
1268 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1269 struct src_register src0 = translate_src_register(
1270 emit, &insn->Src[0] );
1271 struct src_register src1 = translate_src_register(
1272 emit, &insn->Src[1] );
1273
1274 return emit_select( emit, compare, dst, src0, src1 );
1275 }
1276
1277
1278 /* Translate texture instructions to SVGA3D representation.
1279 */
1280 static boolean emit_tex2(struct svga_shader_emitter *emit,
1281 const struct tgsi_full_instruction *insn,
1282 SVGA3dShaderDestToken dst )
1283 {
1284 SVGA3dShaderInstToken inst;
1285 struct src_register texcoord;
1286 struct src_register sampler;
1287 SVGA3dShaderDestToken tmp;
1288
1289 inst.value = 0;
1290
1291 switch (insn->Instruction.Opcode) {
1292 case TGSI_OPCODE_TEX:
1293 inst.op = SVGA3DOP_TEX;
1294 break;
1295 case TGSI_OPCODE_TXP:
1296 inst.op = SVGA3DOP_TEX;
1297 inst.control = SVGA3DOPCONT_PROJECT;
1298 break;
1299 case TGSI_OPCODE_TXB:
1300 inst.op = SVGA3DOP_TEX;
1301 inst.control = SVGA3DOPCONT_BIAS;
1302 break;
1303 case TGSI_OPCODE_TXL:
1304 inst.op = SVGA3DOP_TEXLDL;
1305 break;
1306 default:
1307 assert(0);
1308 return FALSE;
1309 }
1310
1311 texcoord = translate_src_register( emit, &insn->Src[0] );
1312 sampler = translate_src_register( emit, &insn->Src[1] );
1313
1314 if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1315 emit->dynamic_branching_level > 0)
1316 tmp = get_temp( emit );
1317
1318 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1319 * zero in that case.
1320 */
1321 if (emit->dynamic_branching_level > 0 &&
1322 inst.op == SVGA3DOP_TEX &&
1323 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1324 struct src_register zero = get_zero_immediate( emit );
1325
1326 /* MOV tmp, texcoord */
1327 if (!submit_op1( emit,
1328 inst_token( SVGA3DOP_MOV ),
1329 tmp,
1330 texcoord ))
1331 return FALSE;
1332
1333 /* MOV tmp.w, zero */
1334 if (!submit_op1( emit,
1335 inst_token( SVGA3DOP_MOV ),
1336 writemask( tmp, TGSI_WRITEMASK_W ),
1337 scalar( zero, TGSI_SWIZZLE_X )))
1338 return FALSE;
1339
1340 texcoord = src( tmp );
1341 inst.op = SVGA3DOP_TEXLDL;
1342 }
1343
1344 /* Explicit normalization of texcoords:
1345 */
1346 if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1347 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1348
1349 /* MUL tmp, SRC0, WH */
1350 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1351 tmp, texcoord, wh ))
1352 return FALSE;
1353
1354 texcoord = src( tmp );
1355 }
1356
1357 return submit_op2( emit, inst, dst, texcoord, sampler );
1358 }
1359
1360
1361
1362
1363 /* Translate texture instructions to SVGA3D representation.
1364 */
1365 static boolean emit_tex4(struct svga_shader_emitter *emit,
1366 const struct tgsi_full_instruction *insn,
1367 SVGA3dShaderDestToken dst )
1368 {
1369 SVGA3dShaderInstToken inst;
1370 struct src_register texcoord;
1371 struct src_register ddx;
1372 struct src_register ddy;
1373 struct src_register sampler;
1374
1375 texcoord = translate_src_register( emit, &insn->Src[0] );
1376 ddx = translate_src_register( emit, &insn->Src[1] );
1377 ddy = translate_src_register( emit, &insn->Src[2] );
1378 sampler = translate_src_register( emit, &insn->Src[3] );
1379
1380 inst.value = 0;
1381
1382 switch (insn->Instruction.Opcode) {
1383 case TGSI_OPCODE_TXD:
1384 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1385 break;
1386 default:
1387 assert(0);
1388 return FALSE;
1389 }
1390
1391 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1392 }
1393
1394
1395 static boolean emit_tex(struct svga_shader_emitter *emit,
1396 const struct tgsi_full_instruction *insn )
1397 {
1398 SVGA3dShaderDestToken dst =
1399 translate_dst_register( emit, insn, 0 );
1400 struct src_register src0 =
1401 translate_src_register( emit, &insn->Src[0] );
1402 struct src_register src1 =
1403 translate_src_register( emit, &insn->Src[1] );
1404
1405 SVGA3dShaderDestToken tex_result;
1406
1407 /* check for shadow samplers */
1408 boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
1409 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1410
1411
1412 /* If doing compare processing, need to put this value into a
1413 * temporary so it can be used as a source later on.
1414 */
1415 if (compare ||
1416 (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1417 tex_result = get_temp( emit );
1418 }
1419 else {
1420 tex_result = dst;
1421 }
1422
1423 switch(insn->Instruction.Opcode) {
1424 case TGSI_OPCODE_TEX:
1425 case TGSI_OPCODE_TXB:
1426 case TGSI_OPCODE_TXP:
1427 case TGSI_OPCODE_TXL:
1428 if (!emit_tex2( emit, insn, tex_result ))
1429 return FALSE;
1430 break;
1431 case TGSI_OPCODE_TXD:
1432 if (!emit_tex4( emit, insn, tex_result ))
1433 return FALSE;
1434 break;
1435 default:
1436 assert(0);
1437 }
1438
1439
1440 if (compare) {
1441 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1442 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1443 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1444
1445 /* Divide texcoord R by Q */
1446 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1447 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1448 scalar(src0, TGSI_SWIZZLE_W) ))
1449 return FALSE;
1450
1451 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1452 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1453 scalar(src0, TGSI_SWIZZLE_Z),
1454 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1455 return FALSE;
1456
1457 if (!emit_select(
1458 emit,
1459 emit->key.fkey.tex[src1.base.num].compare_func,
1460 writemask( dst, TGSI_WRITEMASK_XYZ ),
1461 scalar(src(src0_zdivw), TGSI_SWIZZLE_X),
1462 tex_src_x))
1463 return FALSE;
1464 }
1465
1466 if (dst.mask & TGSI_WRITEMASK_W) {
1467 struct src_register one =
1468 scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1469
1470 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1471 writemask( dst, TGSI_WRITEMASK_W ),
1472 one ))
1473 return FALSE;
1474 }
1475
1476 return TRUE;
1477 }
1478 else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW)
1479 {
1480 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1481 return FALSE;
1482 }
1483
1484 return TRUE;
1485 }
1486
1487 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1488 const struct tgsi_full_instruction *insn )
1489 {
1490 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1491 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1492 struct src_register const_int = get_loop_const( emit );
1493
1494 emit->dynamic_branching_level++;
1495
1496 return (emit_instruction( emit, inst ) &&
1497 emit_src( emit, loop_reg ) &&
1498 emit_src( emit, const_int ) );
1499 }
1500
1501 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1502 const struct tgsi_full_instruction *insn )
1503 {
1504 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1505
1506 emit->dynamic_branching_level--;
1507
1508 return emit_instruction( emit, inst );
1509 }
1510
1511 static boolean emit_brk( struct svga_shader_emitter *emit,
1512 const struct tgsi_full_instruction *insn )
1513 {
1514 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1515 return emit_instruction( emit, inst );
1516 }
1517
1518 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1519 unsigned opcode,
1520 const struct tgsi_full_instruction *insn )
1521 {
1522 SVGA3dShaderInstToken inst;
1523 SVGA3dShaderDestToken dst;
1524 struct src_register src;
1525
1526 inst = inst_token( opcode );
1527 dst = translate_dst_register( emit, insn, 0 );
1528 src = translate_src_register( emit, &insn->Src[0] );
1529 src = scalar( src, TGSI_SWIZZLE_X );
1530
1531 return submit_op1( emit, inst, dst, src );
1532 }
1533
1534
1535 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1536 unsigned opcode,
1537 const struct tgsi_full_instruction *insn )
1538 {
1539 const struct tgsi_full_src_register *src = insn->Src;
1540 SVGA3dShaderInstToken inst;
1541 SVGA3dShaderDestToken dst;
1542
1543 inst = inst_token( opcode );
1544 dst = translate_dst_register( emit, insn, 0 );
1545
1546 switch (insn->Instruction.NumSrcRegs) {
1547 case 0:
1548 return submit_op0( emit, inst, dst );
1549 case 1:
1550 return submit_op1( emit, inst, dst,
1551 translate_src_register( emit, &src[0] ));
1552 case 2:
1553 return submit_op2( emit, inst, dst,
1554 translate_src_register( emit, &src[0] ),
1555 translate_src_register( emit, &src[1] ) );
1556 case 3:
1557 return submit_op3( emit, inst, dst,
1558 translate_src_register( emit, &src[0] ),
1559 translate_src_register( emit, &src[1] ),
1560 translate_src_register( emit, &src[2] ) );
1561 default:
1562 assert(0);
1563 return FALSE;
1564 }
1565 }
1566
1567
1568 static boolean emit_deriv(struct svga_shader_emitter *emit,
1569 const struct tgsi_full_instruction *insn )
1570 {
1571 if (emit->dynamic_branching_level > 0 &&
1572 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1573 {
1574 struct src_register zero = get_zero_immediate( emit );
1575 SVGA3dShaderDestToken dst =
1576 translate_dst_register( emit, insn, 0 );
1577
1578 /* Deriv opcodes not valid inside dynamic branching, workaround
1579 * by zeroing out the destination.
1580 */
1581 if (!submit_op1(emit,
1582 inst_token( SVGA3DOP_MOV ),
1583 dst,
1584 scalar(zero, TGSI_SWIZZLE_X)))
1585 return FALSE;
1586
1587 return TRUE;
1588 }
1589 else {
1590 unsigned opcode;
1591
1592 switch (insn->Instruction.Opcode) {
1593 case TGSI_OPCODE_DDX:
1594 opcode = SVGA3DOP_DSX;
1595 break;
1596 case TGSI_OPCODE_DDY:
1597 opcode = SVGA3DOP_DSY;
1598 break;
1599 default:
1600 return FALSE;
1601 }
1602
1603 return emit_simple_instruction( emit, opcode, insn );
1604 }
1605 }
1606
1607 static boolean emit_arl(struct svga_shader_emitter *emit,
1608 const struct tgsi_full_instruction *insn)
1609 {
1610 ++emit->current_arl;
1611 if (emit->unit == PIPE_SHADER_FRAGMENT) {
1612 /* MOVA not present in pixel shader instruction set.
1613 * Ignore this instruction altogether since it is
1614 * only used for loop counters -- and for that
1615 * we reference aL directly.
1616 */
1617 return TRUE;
1618 }
1619 if (svga_arl_needs_adjustment( emit )) {
1620 return emit_fake_arl( emit, insn );
1621 } else {
1622 /* no need to adjust, just emit straight arl */
1623 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1624 }
1625 }
1626
1627 static boolean alias_src_dst( struct src_register src,
1628 SVGA3dShaderDestToken dst )
1629 {
1630 if (src.base.num != dst.num)
1631 return FALSE;
1632
1633 if (SVGA3dShaderGetRegType(dst.value) !=
1634 SVGA3dShaderGetRegType(src.base.value))
1635 return FALSE;
1636
1637 return TRUE;
1638 }
1639
1640 static boolean emit_pow(struct svga_shader_emitter *emit,
1641 const struct tgsi_full_instruction *insn)
1642 {
1643 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1644 struct src_register src0 = translate_src_register(
1645 emit, &insn->Src[0] );
1646 struct src_register src1 = translate_src_register(
1647 emit, &insn->Src[1] );
1648 boolean need_tmp = FALSE;
1649
1650 /* POW can only output to a temporary */
1651 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1652 need_tmp = TRUE;
1653
1654 /* POW src1 must not be the same register as dst */
1655 if (alias_src_dst( src1, dst ))
1656 need_tmp = TRUE;
1657
1658 /* it's a scalar op */
1659 src0 = scalar( src0, TGSI_SWIZZLE_X );
1660 src1 = scalar( src1, TGSI_SWIZZLE_X );
1661
1662 if (need_tmp) {
1663 SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1664
1665 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1666 return FALSE;
1667
1668 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1669 }
1670 else {
1671 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1672 }
1673 }
1674
1675 static boolean emit_xpd(struct svga_shader_emitter *emit,
1676 const struct tgsi_full_instruction *insn)
1677 {
1678 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1679 const struct src_register src0 = translate_src_register(
1680 emit, &insn->Src[0] );
1681 const struct src_register src1 = translate_src_register(
1682 emit, &insn->Src[1] );
1683 boolean need_dst_tmp = FALSE;
1684
1685 /* XPD can only output to a temporary */
1686 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1687 need_dst_tmp = TRUE;
1688
1689 /* The dst reg must not be the same as src0 or src1*/
1690 if (alias_src_dst(src0, dst) ||
1691 alias_src_dst(src1, dst))
1692 need_dst_tmp = TRUE;
1693
1694 if (need_dst_tmp) {
1695 SVGA3dShaderDestToken tmp = get_temp( emit );
1696
1697 /* Obey DX9 restrictions on mask:
1698 */
1699 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1700
1701 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1702 return FALSE;
1703
1704 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1705 return FALSE;
1706 }
1707 else {
1708 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1709 return FALSE;
1710 }
1711
1712 /* Need to emit 1.0 to dst.w?
1713 */
1714 if (dst.mask & TGSI_WRITEMASK_W) {
1715 struct src_register zero = get_zero_immediate( emit );
1716
1717 if (!submit_op1(emit,
1718 inst_token( SVGA3DOP_MOV ),
1719 writemask(dst, TGSI_WRITEMASK_W),
1720 zero))
1721 return FALSE;
1722 }
1723
1724 return TRUE;
1725 }
1726
1727
1728 static boolean emit_lrp(struct svga_shader_emitter *emit,
1729 const struct tgsi_full_instruction *insn)
1730 {
1731 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1732 SVGA3dShaderDestToken tmp;
1733 const struct src_register src0 = translate_src_register(
1734 emit, &insn->Src[0] );
1735 const struct src_register src1 = translate_src_register(
1736 emit, &insn->Src[1] );
1737 const struct src_register src2 = translate_src_register(
1738 emit, &insn->Src[2] );
1739 boolean need_dst_tmp = FALSE;
1740
1741 /* The dst reg must not be the same as src0 or src2 */
1742 if (alias_src_dst(src0, dst) ||
1743 alias_src_dst(src2, dst))
1744 need_dst_tmp = TRUE;
1745
1746 if (need_dst_tmp) {
1747 tmp = get_temp( emit );
1748 tmp.mask = dst.mask;
1749 }
1750 else {
1751 tmp = dst;
1752 }
1753
1754 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
1755 return FALSE;
1756
1757 if (need_dst_tmp) {
1758 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1759 return FALSE;
1760 }
1761
1762 return TRUE;
1763 }
1764
1765
1766 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
1767 const struct tgsi_full_instruction *insn )
1768 {
1769 if (emit->unit == PIPE_SHADER_VERTEX) {
1770 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
1771 */
1772 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
1773 }
1774 else {
1775
1776 /* result[0] = 1 * 1;
1777 * result[1] = a[1] * b[1];
1778 * result[2] = a[2] * 1;
1779 * result[3] = 1 * b[3];
1780 */
1781
1782 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1783 SVGA3dShaderDestToken tmp;
1784 const struct src_register src0 = translate_src_register(
1785 emit, &insn->Src[0] );
1786 const struct src_register src1 = translate_src_register(
1787 emit, &insn->Src[1] );
1788 struct src_register zero = get_zero_immediate( emit );
1789 boolean need_tmp = FALSE;
1790
1791 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
1792 alias_src_dst(src0, dst) ||
1793 alias_src_dst(src1, dst))
1794 need_tmp = TRUE;
1795
1796 if (need_tmp) {
1797 tmp = get_temp( emit );
1798 }
1799 else {
1800 tmp = dst;
1801 }
1802
1803 /* tmp.xw = 1.0
1804 */
1805 if (tmp.mask & TGSI_WRITEMASK_XW) {
1806 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1807 writemask(tmp, TGSI_WRITEMASK_XW ),
1808 scalar( zero, 3 )))
1809 return FALSE;
1810 }
1811
1812 /* tmp.yz = src0
1813 */
1814 if (tmp.mask & TGSI_WRITEMASK_YZ) {
1815 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1816 writemask(tmp, TGSI_WRITEMASK_YZ ),
1817 src0))
1818 return FALSE;
1819 }
1820
1821 /* tmp.yw = tmp * src1
1822 */
1823 if (tmp.mask & TGSI_WRITEMASK_YW) {
1824 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1825 writemask(tmp, TGSI_WRITEMASK_YW ),
1826 src(tmp),
1827 src1))
1828 return FALSE;
1829 }
1830
1831 /* dst = tmp
1832 */
1833 if (need_tmp) {
1834 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1835 dst,
1836 src(tmp)))
1837 return FALSE;
1838 }
1839 }
1840
1841 return TRUE;
1842 }
1843
1844
1845 static boolean emit_exp(struct svga_shader_emitter *emit,
1846 const struct tgsi_full_instruction *insn)
1847 {
1848 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1849 struct src_register src0 =
1850 translate_src_register( emit, &insn->Src[0] );
1851 struct src_register zero = get_zero_immediate( emit );
1852 SVGA3dShaderDestToken fraction;
1853
1854 if (dst.mask & TGSI_WRITEMASK_Y)
1855 fraction = dst;
1856 else if (dst.mask & TGSI_WRITEMASK_X)
1857 fraction = get_temp( emit );
1858 else
1859 fraction.value = 0;
1860
1861 /* If y is being written, fill it with src0 - floor(src0).
1862 */
1863 if (dst.mask & TGSI_WRITEMASK_XY) {
1864 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
1865 writemask( fraction, TGSI_WRITEMASK_Y ),
1866 src0 ))
1867 return FALSE;
1868 }
1869
1870 /* If x is being written, fill it with 2 ^ floor(src0).
1871 */
1872 if (dst.mask & TGSI_WRITEMASK_X) {
1873 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
1874 writemask( dst, TGSI_WRITEMASK_X ),
1875 src0,
1876 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
1877 return FALSE;
1878
1879 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
1880 writemask( dst, TGSI_WRITEMASK_X ),
1881 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
1882 return FALSE;
1883
1884 if (!(dst.mask & TGSI_WRITEMASK_Y))
1885 release_temp( emit, fraction );
1886 }
1887
1888 /* If z is being written, fill it with 2 ^ src0 (partial precision).
1889 */
1890 if (dst.mask & TGSI_WRITEMASK_Z) {
1891 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
1892 writemask( dst, TGSI_WRITEMASK_Z ),
1893 src0 ) )
1894 return FALSE;
1895 }
1896
1897 /* If w is being written, fill it with one.
1898 */
1899 if (dst.mask & TGSI_WRITEMASK_W) {
1900 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1901 writemask(dst, TGSI_WRITEMASK_W),
1902 scalar( zero, TGSI_SWIZZLE_W ) ))
1903 return FALSE;
1904 }
1905
1906 return TRUE;
1907 }
1908
1909 static boolean emit_lit(struct svga_shader_emitter *emit,
1910 const struct tgsi_full_instruction *insn )
1911 {
1912 if (emit->unit == PIPE_SHADER_VERTEX) {
1913 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
1914 */
1915 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
1916 }
1917 else {
1918
1919 /* D3D vs. GL semantics can be fairly easily accomodated by
1920 * variations on this sequence.
1921 *
1922 * GL:
1923 * tmp.y = src.x
1924 * tmp.z = pow(src.y,src.w)
1925 * p0 = src0.xxxx > 0
1926 * result = zero.wxxw
1927 * (p0) result.yz = tmp
1928 *
1929 * D3D:
1930 * tmp.y = src.x
1931 * tmp.z = pow(src.y,src.w)
1932 * p0 = src0.xxyy > 0
1933 * result = zero.wxxw
1934 * (p0) result.yz = tmp
1935 *
1936 * Will implement the GL version for now.
1937 */
1938
1939 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1940 SVGA3dShaderDestToken tmp = get_temp( emit );
1941 const struct src_register src0 = translate_src_register(
1942 emit, &insn->Src[0] );
1943 struct src_register zero = get_zero_immediate( emit );
1944
1945 /* tmp = pow(src.y, src.w)
1946 */
1947 if (dst.mask & TGSI_WRITEMASK_Z) {
1948 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
1949 tmp,
1950 scalar(src0, 1),
1951 scalar(src0, 3)))
1952 return FALSE;
1953 }
1954
1955 /* tmp.y = src.x
1956 */
1957 if (dst.mask & TGSI_WRITEMASK_Y) {
1958 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1959 writemask(tmp, TGSI_WRITEMASK_Y ),
1960 scalar(src0, 0)))
1961 return FALSE;
1962 }
1963
1964 /* Can't quite do this with emit conditional due to the extra
1965 * writemask on the predicated mov:
1966 */
1967 {
1968 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1969 SVGA3dShaderInstToken setp_token, mov_token;
1970 struct src_register predsrc;
1971
1972 setp_token = inst_token( SVGA3DOP_SETP );
1973 mov_token = inst_token( SVGA3DOP_MOV );
1974
1975 setp_token.control = SVGA3DOPCOMP_GT;
1976
1977 /* D3D vs GL semantics:
1978 */
1979 if (0)
1980 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
1981 else
1982 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
1983
1984 /* SETP src0.xxyy, GT, {0}.x */
1985 if (!submit_op2( emit, setp_token, pred_reg,
1986 predsrc,
1987 swizzle(zero, 0, 0, 0, 0) ))
1988 return FALSE;
1989
1990 /* MOV dst, fail */
1991 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
1992 swizzle(zero, 3, 0, 0, 3 )))
1993 return FALSE;
1994
1995 /* MOV dst.yz, tmp (predicated)
1996 *
1997 * Note that the predicate reg (and possible modifiers) is passed
1998 * as the first source argument.
1999 */
2000 if (dst.mask & TGSI_WRITEMASK_YZ) {
2001 mov_token.predicated = 1;
2002 if (!submit_op2( emit, mov_token,
2003 writemask(dst, TGSI_WRITEMASK_YZ),
2004 src( pred_reg ), src( tmp ) ))
2005 return FALSE;
2006 }
2007 }
2008 }
2009
2010 return TRUE;
2011 }
2012
2013
2014
2015
2016 static boolean emit_ex2( struct svga_shader_emitter *emit,
2017 const struct tgsi_full_instruction *insn )
2018 {
2019 SVGA3dShaderInstToken inst;
2020 SVGA3dShaderDestToken dst;
2021 struct src_register src0;
2022
2023 inst = inst_token( SVGA3DOP_EXP );
2024 dst = translate_dst_register( emit, insn, 0 );
2025 src0 = translate_src_register( emit, &insn->Src[0] );
2026 src0 = scalar( src0, TGSI_SWIZZLE_X );
2027
2028 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2029 SVGA3dShaderDestToken tmp = get_temp( emit );
2030
2031 if (!submit_op1( emit, inst, tmp, src0 ))
2032 return FALSE;
2033
2034 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2035 dst,
2036 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2037 }
2038
2039 return submit_op1( emit, inst, dst, src0 );
2040 }
2041
2042
2043 static boolean emit_log(struct svga_shader_emitter *emit,
2044 const struct tgsi_full_instruction *insn)
2045 {
2046 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2047 struct src_register src0 =
2048 translate_src_register( emit, &insn->Src[0] );
2049 struct src_register zero = get_zero_immediate( emit );
2050 SVGA3dShaderDestToken abs_tmp;
2051 struct src_register abs_src0;
2052 SVGA3dShaderDestToken log2_abs;
2053
2054 abs_tmp.value = 0;
2055
2056 if (dst.mask & TGSI_WRITEMASK_Z)
2057 log2_abs = dst;
2058 else if (dst.mask & TGSI_WRITEMASK_XY)
2059 log2_abs = get_temp( emit );
2060 else
2061 log2_abs.value = 0;
2062
2063 /* If z is being written, fill it with log2( abs( src0 ) ).
2064 */
2065 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2066 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2067 abs_src0 = src0;
2068 else {
2069 abs_tmp = get_temp( emit );
2070
2071 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2072 abs_tmp,
2073 src0 ) )
2074 return FALSE;
2075
2076 abs_src0 = src( abs_tmp );
2077 }
2078
2079 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2080
2081 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2082 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2083 abs_src0 ) )
2084 return FALSE;
2085 }
2086
2087 if (dst.mask & TGSI_WRITEMASK_XY) {
2088 SVGA3dShaderDestToken floor_log2;
2089
2090 if (dst.mask & TGSI_WRITEMASK_X)
2091 floor_log2 = dst;
2092 else
2093 floor_log2 = get_temp( emit );
2094
2095 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2096 */
2097 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2098 writemask( floor_log2, TGSI_WRITEMASK_X ),
2099 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2100 return FALSE;
2101
2102 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2103 writemask( floor_log2, TGSI_WRITEMASK_X ),
2104 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2105 negate( src( floor_log2 ) ) ) )
2106 return FALSE;
2107
2108 /* If y is being written, fill it with
2109 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2110 */
2111 if (dst.mask & TGSI_WRITEMASK_Y) {
2112 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2113 writemask( dst, TGSI_WRITEMASK_Y ),
2114 negate( scalar( src( floor_log2 ),
2115 TGSI_SWIZZLE_X ) ) ) )
2116 return FALSE;
2117
2118 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2119 writemask( dst, TGSI_WRITEMASK_Y ),
2120 src( dst ),
2121 abs_src0 ) )
2122 return FALSE;
2123 }
2124
2125 if (!(dst.mask & TGSI_WRITEMASK_X))
2126 release_temp( emit, floor_log2 );
2127
2128 if (!(dst.mask & TGSI_WRITEMASK_Z))
2129 release_temp( emit, log2_abs );
2130 }
2131
2132 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2133 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2134 release_temp( emit, abs_tmp );
2135
2136 /* If w is being written, fill it with one.
2137 */
2138 if (dst.mask & TGSI_WRITEMASK_W) {
2139 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2140 writemask(dst, TGSI_WRITEMASK_W),
2141 scalar( zero, TGSI_SWIZZLE_W ) ))
2142 return FALSE;
2143 }
2144
2145 return TRUE;
2146 }
2147
2148
2149 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2150 unsigned position,
2151 const struct tgsi_full_instruction *insn )
2152 {
2153 unsigned i;
2154
2155 /* Note that we've finished the main function and are now emitting
2156 * subroutines. This affects how we terminate the generated
2157 * shader.
2158 */
2159 emit->in_main_func = FALSE;
2160
2161 for (i = 0; i < emit->nr_labels; i++) {
2162 if (emit->label[i] == position) {
2163 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2164 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2165 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2166 }
2167 }
2168
2169 assert(0);
2170 return TRUE;
2171 }
2172
2173 static boolean emit_call( struct svga_shader_emitter *emit,
2174 const struct tgsi_full_instruction *insn )
2175 {
2176 unsigned position = insn->Label.Label;
2177 unsigned i;
2178
2179 for (i = 0; i < emit->nr_labels; i++) {
2180 if (emit->label[i] == position)
2181 break;
2182 }
2183
2184 if (emit->nr_labels == Elements(emit->label))
2185 return FALSE;
2186
2187 if (i == emit->nr_labels) {
2188 emit->label[i] = position;
2189 emit->nr_labels++;
2190 }
2191
2192 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2193 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2194 }
2195
2196
2197 static boolean emit_end( struct svga_shader_emitter *emit )
2198 {
2199 if (emit->unit == PIPE_SHADER_VERTEX) {
2200 return emit_vs_postamble( emit );
2201 }
2202 else {
2203 return emit_ps_postamble( emit );
2204 }
2205 }
2206
2207
2208
2209 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2210 unsigned position,
2211 const struct tgsi_full_instruction *insn )
2212 {
2213 switch (insn->Instruction.Opcode) {
2214
2215 case TGSI_OPCODE_ARL:
2216 return emit_arl( emit, insn );
2217
2218 case TGSI_OPCODE_TEX:
2219 case TGSI_OPCODE_TXB:
2220 case TGSI_OPCODE_TXP:
2221 case TGSI_OPCODE_TXL:
2222 case TGSI_OPCODE_TXD:
2223 return emit_tex( emit, insn );
2224
2225 case TGSI_OPCODE_DDX:
2226 case TGSI_OPCODE_DDY:
2227 return emit_deriv( emit, insn );
2228
2229 case TGSI_OPCODE_BGNSUB:
2230 return emit_bgnsub( emit, position, insn );
2231
2232 case TGSI_OPCODE_ENDSUB:
2233 return TRUE;
2234
2235 case TGSI_OPCODE_CAL:
2236 return emit_call( emit, insn );
2237
2238 case TGSI_OPCODE_FLR:
2239 case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */
2240 return emit_floor( emit, insn );
2241
2242 case TGSI_OPCODE_CMP:
2243 return emit_cmp( emit, insn );
2244
2245 case TGSI_OPCODE_DIV:
2246 return emit_div( emit, insn );
2247
2248 case TGSI_OPCODE_DP2:
2249 return emit_dp2( emit, insn );
2250
2251 case TGSI_OPCODE_DPH:
2252 return emit_dph( emit, insn );
2253
2254 case TGSI_OPCODE_NRM:
2255 return emit_nrm( emit, insn );
2256
2257 case TGSI_OPCODE_COS:
2258 return emit_cos( emit, insn );
2259
2260 case TGSI_OPCODE_SIN:
2261 return emit_sin( emit, insn );
2262
2263 case TGSI_OPCODE_SCS:
2264 return emit_sincos( emit, insn );
2265
2266 case TGSI_OPCODE_END:
2267 /* TGSI always finishes the main func with an END */
2268 return emit_end( emit );
2269
2270 case TGSI_OPCODE_KIL:
2271 return emit_kil( emit, insn );
2272
2273 /* Selection opcodes. The underlying language is fairly
2274 * non-orthogonal about these.
2275 */
2276 case TGSI_OPCODE_SEQ:
2277 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2278
2279 case TGSI_OPCODE_SNE:
2280 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2281
2282 case TGSI_OPCODE_SGT:
2283 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2284
2285 case TGSI_OPCODE_SGE:
2286 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2287
2288 case TGSI_OPCODE_SLT:
2289 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2290
2291 case TGSI_OPCODE_SLE:
2292 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2293
2294 case TGSI_OPCODE_SUB:
2295 return emit_sub( emit, insn );
2296
2297 case TGSI_OPCODE_POW:
2298 return emit_pow( emit, insn );
2299
2300 case TGSI_OPCODE_EX2:
2301 return emit_ex2( emit, insn );
2302
2303 case TGSI_OPCODE_EXP:
2304 return emit_exp( emit, insn );
2305
2306 case TGSI_OPCODE_LOG:
2307 return emit_log( emit, insn );
2308
2309 case TGSI_OPCODE_LG2:
2310 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2311
2312 case TGSI_OPCODE_RSQ:
2313 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2314
2315 case TGSI_OPCODE_RCP:
2316 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2317
2318 case TGSI_OPCODE_CONT:
2319 case TGSI_OPCODE_RET:
2320 /* This is a noop -- we tell mesa that we can't support RET
2321 * within a function (early return), so this will always be
2322 * followed by an ENDSUB.
2323 */
2324 return TRUE;
2325
2326 /* These aren't actually used by any of the frontends we care
2327 * about:
2328 */
2329 case TGSI_OPCODE_CLAMP:
2330 case TGSI_OPCODE_ROUND:
2331 case TGSI_OPCODE_AND:
2332 case TGSI_OPCODE_OR:
2333 case TGSI_OPCODE_I2F:
2334 case TGSI_OPCODE_NOT:
2335 case TGSI_OPCODE_SHL:
2336 case TGSI_OPCODE_ISHR:
2337 case TGSI_OPCODE_XOR:
2338 return FALSE;
2339
2340 case TGSI_OPCODE_IF:
2341 return emit_if( emit, insn );
2342 case TGSI_OPCODE_ELSE:
2343 return emit_else( emit, insn );
2344 case TGSI_OPCODE_ENDIF:
2345 return emit_endif( emit, insn );
2346
2347 case TGSI_OPCODE_BGNLOOP:
2348 return emit_bgnloop2( emit, insn );
2349 case TGSI_OPCODE_ENDLOOP:
2350 return emit_endloop2( emit, insn );
2351 case TGSI_OPCODE_BRK:
2352 return emit_brk( emit, insn );
2353
2354 case TGSI_OPCODE_XPD:
2355 return emit_xpd( emit, insn );
2356
2357 case TGSI_OPCODE_KILP:
2358 return emit_kilp( emit, insn );
2359
2360 case TGSI_OPCODE_DST:
2361 return emit_dst_insn( emit, insn );
2362
2363 case TGSI_OPCODE_LIT:
2364 return emit_lit( emit, insn );
2365
2366 case TGSI_OPCODE_LRP:
2367 return emit_lrp( emit, insn );
2368
2369 default: {
2370 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2371
2372 if (opcode == SVGA3DOP_LAST_INST)
2373 return FALSE;
2374
2375 if (!emit_simple_instruction( emit, opcode, insn ))
2376 return FALSE;
2377 }
2378 }
2379
2380 return TRUE;
2381 }
2382
2383
2384 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2385 struct tgsi_full_immediate *imm)
2386 {
2387 static const float id[4] = {0,0,0,1};
2388 float value[4];
2389 unsigned i;
2390
2391 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2392 for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2393 value[i] = imm->u[i].Float;
2394
2395 for ( ; i < 4; i++ )
2396 value[i] = id[i];
2397
2398 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2399 emit->imm_start + emit->internal_imm_count++,
2400 value[0], value[1], value[2], value[3]);
2401 }
2402
2403 static boolean make_immediate( struct svga_shader_emitter *emit,
2404 float a,
2405 float b,
2406 float c,
2407 float d,
2408 struct src_register *out )
2409 {
2410 unsigned idx = emit->nr_hw_float_const++;
2411
2412 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2413 idx, a, b, c, d ))
2414 return FALSE;
2415
2416 *out = src_register( SVGA3DREG_CONST, idx );
2417
2418 return TRUE;
2419 }
2420
2421 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2422 {
2423 if (!emit->key.vkey.need_prescale) {
2424 if (!make_immediate( emit, 0, 0, .5, .5,
2425 &emit->imm_0055))
2426 return FALSE;
2427 }
2428
2429 return TRUE;
2430 }
2431
2432 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2433 {
2434 unsigned i;
2435
2436 /* For SM20, need to initialize the temporaries we're using to hold
2437 * color outputs to some value. Shaders which don't set all of
2438 * these values are likely to be rejected by the DX9 runtime.
2439 */
2440 if (!emit->use_sm30) {
2441 struct src_register zero = get_zero_immediate( emit );
2442 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2443 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2444
2445 if (!submit_op1( emit,
2446 inst_token(SVGA3DOP_MOV),
2447 emit->temp_col[i],
2448 zero ))
2449 return FALSE;
2450 }
2451 }
2452 }
2453
2454 return TRUE;
2455 }
2456
2457 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2458 {
2459 unsigned i;
2460
2461 /* PS oDepth is incredibly fragile and it's very hard to catch the
2462 * types of usage that break it during shader emit. Easier just to
2463 * redirect the main program to a temporary and then only touch
2464 * oDepth with a hand-crafted MOV below.
2465 */
2466 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2467
2468 if (!submit_op1( emit,
2469 inst_token(SVGA3DOP_MOV),
2470 emit->true_pos,
2471 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2472 return FALSE;
2473 }
2474
2475 /* Similarly for SM20 color outputs... Luckily SM30 isn't so
2476 * fragile.
2477 */
2478 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2479 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2480
2481 /* Potentially override output colors with white for XOR
2482 * logicop workaround.
2483 */
2484 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2485 emit->key.fkey.white_fragments) {
2486
2487 struct src_register one = scalar( get_zero_immediate( emit ),
2488 TGSI_SWIZZLE_W );
2489
2490 if (!submit_op1( emit,
2491 inst_token(SVGA3DOP_MOV),
2492 emit->true_col[i],
2493 one ))
2494 return FALSE;
2495 }
2496 else {
2497 if (!submit_op1( emit,
2498 inst_token(SVGA3DOP_MOV),
2499 emit->true_col[i],
2500 src(emit->temp_col[i]) ))
2501 return FALSE;
2502 }
2503 }
2504 }
2505
2506 return TRUE;
2507 }
2508
2509 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2510 {
2511 /* PSIZ output is incredibly fragile and it's very hard to catch
2512 * the types of usage that break it during shader emit. Easier
2513 * just to redirect the main program to a temporary and then only
2514 * touch PSIZ with a hand-crafted MOV below.
2515 */
2516 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2517
2518 if (!submit_op1( emit,
2519 inst_token(SVGA3DOP_MOV),
2520 emit->true_psiz,
2521 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2522 return FALSE;
2523 }
2524
2525 /* Need to perform various manipulations on vertex position to cope
2526 * with the different GL and D3D clip spaces.
2527 */
2528 if (emit->key.vkey.need_prescale) {
2529 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2530 SVGA3dShaderDestToken pos = emit->true_pos;
2531 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2532 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2533 offset + 0 );
2534 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2535 offset + 1 );
2536
2537 /* MUL temp_pos.xyz, temp_pos, prescale.scale
2538 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2539 * --> Note that prescale.trans.w == 0
2540 */
2541 if (!submit_op2( emit,
2542 inst_token(SVGA3DOP_MUL),
2543 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2544 src(temp_pos),
2545 prescale_scale ))
2546 return FALSE;
2547
2548 if (!submit_op3( emit,
2549 inst_token(SVGA3DOP_MAD),
2550 pos,
2551 swizzle(src(temp_pos), 3, 3, 3, 3),
2552 prescale_trans,
2553 src(temp_pos)))
2554 return FALSE;
2555 }
2556 else {
2557 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2558 SVGA3dShaderDestToken pos = emit->true_pos;
2559 struct src_register imm_0055 = emit->imm_0055;
2560
2561 /* Adjust GL clipping coordinate space to hardware (D3D-style):
2562 *
2563 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2564 * MOV result.position, temp_pos
2565 */
2566 if (!submit_op2( emit,
2567 inst_token(SVGA3DOP_DP4),
2568 writemask(temp_pos, TGSI_WRITEMASK_Z),
2569 imm_0055,
2570 src(temp_pos) ))
2571 return FALSE;
2572
2573 if (!submit_op1( emit,
2574 inst_token(SVGA3DOP_MOV),
2575 pos,
2576 src(temp_pos) ))
2577 return FALSE;
2578 }
2579
2580 return TRUE;
2581 }
2582
2583 /*
2584 0: IF VFACE :4
2585 1: COLOR = FrontColor;
2586 2: ELSE
2587 3: COLOR = BackColor;
2588 4: ENDIF
2589 */
2590 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2591 {
2592 struct src_register vface, zero;
2593 struct src_register front[2];
2594 struct src_register back[2];
2595 SVGA3dShaderDestToken color[2];
2596 int count = emit->internal_color_count;
2597 int i;
2598 SVGA3dShaderInstToken if_token;
2599
2600 if (count == 0)
2601 return TRUE;
2602
2603 vface = get_vface( emit );
2604 zero = get_zero_immediate( emit );
2605
2606 /* Can't use get_temp() to allocate the color reg as such
2607 * temporaries will be reclaimed after each instruction by the call
2608 * to reset_temp_regs().
2609 */
2610 for (i = 0; i < count; i++) {
2611 color[i] = dst_register( SVGA3DREG_TEMP,
2612 emit->nr_hw_temp++ );
2613
2614 front[i] = emit->input_map[emit->internal_color_idx[i]];
2615
2616 /* Back is always the next input:
2617 */
2618 back[i] = front[i];
2619 back[i].base.num = front[i].base.num + 1;
2620
2621 /* Reassign the input_map to the actual front-face color:
2622 */
2623 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2624 }
2625
2626 if_token = inst_token( SVGA3DOP_IFC );
2627
2628 if (emit->key.fkey.front_ccw)
2629 if_token.control = SVGA3DOPCOMP_LT;
2630 else
2631 if_token.control = SVGA3DOPCOMP_GT;
2632
2633 zero = scalar(zero, TGSI_SWIZZLE_X);
2634
2635 if (!(emit_instruction( emit, if_token ) &&
2636 emit_src( emit, vface ) &&
2637 emit_src( emit, zero ) ))
2638 return FALSE;
2639
2640 for (i = 0; i < count; i++) {
2641 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2642 return FALSE;
2643 }
2644
2645 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2646 return FALSE;
2647
2648 for (i = 0; i < count; i++) {
2649 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2650 return FALSE;
2651 }
2652
2653 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2654 return FALSE;
2655
2656 return TRUE;
2657 }
2658
2659 /*
2660 0: SETP_GT TEMP, VFACE, 0
2661 where TEMP is a fake frontface register
2662 */
2663 static boolean emit_frontface( struct svga_shader_emitter *emit )
2664 {
2665 struct src_register vface, zero;
2666 SVGA3dShaderDestToken temp;
2667 struct src_register pass, fail;
2668
2669 vface = get_vface( emit );
2670 zero = get_zero_immediate( emit );
2671
2672 /* Can't use get_temp() to allocate the fake frontface reg as such
2673 * temporaries will be reclaimed after each instruction by the call
2674 * to reset_temp_regs().
2675 */
2676 temp = dst_register( SVGA3DREG_TEMP,
2677 emit->nr_hw_temp++ );
2678
2679 if (emit->key.fkey.front_ccw) {
2680 pass = scalar( zero, TGSI_SWIZZLE_X );
2681 fail = scalar( zero, TGSI_SWIZZLE_W );
2682 } else {
2683 pass = scalar( zero, TGSI_SWIZZLE_W );
2684 fail = scalar( zero, TGSI_SWIZZLE_X );
2685 }
2686
2687 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2688 temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2689 pass, fail))
2690 return FALSE;
2691
2692 /* Reassign the input_map to the actual front-face color:
2693 */
2694 emit->input_map[emit->internal_frontface_idx] = src(temp);
2695
2696 return TRUE;
2697 }
2698
2699 static INLINE boolean
2700 needs_to_create_zero( struct svga_shader_emitter *emit )
2701 {
2702 int i;
2703
2704 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2705 if (!emit->use_sm30)
2706 return TRUE;
2707
2708 if (emit->key.fkey.light_twoside)
2709 return TRUE;
2710
2711 if (emit->key.fkey.white_fragments)
2712 return TRUE;
2713
2714 if (emit->emit_frontface)
2715 return TRUE;
2716
2717 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
2718 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
2719 return TRUE;
2720 }
2721
2722 if (emit->unit == PIPE_SHADER_VERTEX) {
2723 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
2724 return TRUE;
2725 }
2726
2727 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
2728 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
2729 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
2730 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
2731 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
2732 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
2733 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
2734 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
2735 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
2736 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
2737 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
2738 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
2739 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
2740 emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
2741 return TRUE;
2742
2743 for (i = 0; i < emit->key.fkey.num_textures; i++) {
2744 if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2745 return TRUE;
2746 }
2747
2748 return FALSE;
2749 }
2750
2751 static INLINE boolean
2752 needs_to_create_loop_const( struct svga_shader_emitter *emit )
2753 {
2754 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
2755 }
2756
2757 static INLINE boolean
2758 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
2759 {
2760 return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
2761 emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
2762 emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
2763 }
2764
2765 static INLINE boolean
2766 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
2767 {
2768 return (emit->num_arl_consts > 0);
2769 }
2770
2771 static INLINE boolean
2772 pre_parse_add_indirect( struct svga_shader_emitter *emit,
2773 int num, int current_arl)
2774 {
2775 int i;
2776 assert(num < 0);
2777
2778 for (i = 0; i < emit->num_arl_consts; ++i) {
2779 if (emit->arl_consts[i].arl_num == current_arl)
2780 break;
2781 }
2782 /* new entry */
2783 if (emit->num_arl_consts == i) {
2784 ++emit->num_arl_consts;
2785 }
2786 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
2787 num :
2788 emit->arl_consts[i].number;
2789 emit->arl_consts[i].arl_num = current_arl;
2790 return TRUE;
2791 }
2792
2793 static boolean
2794 pre_parse_instruction( struct svga_shader_emitter *emit,
2795 const struct tgsi_full_instruction *insn,
2796 int current_arl)
2797 {
2798 if (insn->Src[0].Register.Indirect &&
2799 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
2800 const struct tgsi_full_src_register *reg = &insn->Src[0];
2801 if (reg->Register.Index < 0) {
2802 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2803 }
2804 }
2805
2806 if (insn->Src[1].Register.Indirect &&
2807 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
2808 const struct tgsi_full_src_register *reg = &insn->Src[1];
2809 if (reg->Register.Index < 0) {
2810 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2811 }
2812 }
2813
2814 if (insn->Src[2].Register.Indirect &&
2815 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
2816 const struct tgsi_full_src_register *reg = &insn->Src[2];
2817 if (reg->Register.Index < 0) {
2818 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2819 }
2820 }
2821
2822 return TRUE;
2823 }
2824
2825 static boolean
2826 pre_parse_tokens( struct svga_shader_emitter *emit,
2827 const struct tgsi_token *tokens )
2828 {
2829 struct tgsi_parse_context parse;
2830 int current_arl = 0;
2831
2832 tgsi_parse_init( &parse, tokens );
2833
2834 while (!tgsi_parse_end_of_tokens( &parse )) {
2835 tgsi_parse_token( &parse );
2836 switch (parse.FullToken.Token.Type) {
2837 case TGSI_TOKEN_TYPE_IMMEDIATE:
2838 case TGSI_TOKEN_TYPE_DECLARATION:
2839 break;
2840 case TGSI_TOKEN_TYPE_INSTRUCTION:
2841 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
2842 TGSI_OPCODE_ARL) {
2843 ++current_arl;
2844 }
2845 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
2846 current_arl ))
2847 return FALSE;
2848 break;
2849 default:
2850 break;
2851 }
2852
2853 }
2854 return TRUE;
2855 }
2856
2857 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
2858
2859 {
2860 if (needs_to_create_zero( emit )) {
2861 create_zero_immediate( emit );
2862 }
2863 if (needs_to_create_loop_const( emit )) {
2864 create_loop_const( emit );
2865 }
2866 if (needs_to_create_sincos_consts( emit )) {
2867 create_sincos_consts( emit );
2868 }
2869 if (needs_to_create_arl_consts( emit )) {
2870 create_arl_consts( emit );
2871 }
2872
2873 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2874 if (!emit_ps_preamble( emit ))
2875 return FALSE;
2876
2877 if (emit->key.fkey.light_twoside) {
2878 if (!emit_light_twoside( emit ))
2879 return FALSE;
2880 }
2881 if (emit->emit_frontface) {
2882 if (!emit_frontface( emit ))
2883 return FALSE;
2884 }
2885 }
2886
2887 return TRUE;
2888 }
2889
2890 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
2891 const struct tgsi_token *tokens )
2892 {
2893 struct tgsi_parse_context parse;
2894 boolean ret = TRUE;
2895 boolean helpers_emitted = FALSE;
2896 unsigned line_nr = 0;
2897
2898 tgsi_parse_init( &parse, tokens );
2899 emit->internal_imm_count = 0;
2900
2901 if (emit->unit == PIPE_SHADER_VERTEX) {
2902 ret = emit_vs_preamble( emit );
2903 if (!ret)
2904 goto done;
2905 }
2906
2907 pre_parse_tokens(emit, tokens);
2908
2909 while (!tgsi_parse_end_of_tokens( &parse )) {
2910 tgsi_parse_token( &parse );
2911
2912 switch (parse.FullToken.Token.Type) {
2913 case TGSI_TOKEN_TYPE_IMMEDIATE:
2914 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
2915 if (!ret)
2916 goto done;
2917 break;
2918
2919 case TGSI_TOKEN_TYPE_DECLARATION:
2920 if (emit->use_sm30)
2921 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
2922 else
2923 ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
2924 if (!ret)
2925 goto done;
2926 break;
2927
2928 case TGSI_TOKEN_TYPE_INSTRUCTION:
2929 if (!helpers_emitted) {
2930 if (!svga_shader_emit_helpers( emit ))
2931 goto done;
2932 helpers_emitted = TRUE;
2933 }
2934 ret = svga_emit_instruction( emit,
2935 line_nr++,
2936 &parse.FullToken.FullInstruction );
2937 if (!ret)
2938 goto done;
2939 break;
2940 default:
2941 break;
2942 }
2943
2944 reset_temp_regs( emit );
2945 }
2946
2947 /* Need to terminate the current subroutine. Note that the
2948 * hardware doesn't tolerate shaders without sub-routines
2949 * terminating with RET+END.
2950 */
2951 if (!emit->in_main_func) {
2952 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
2953 if (!ret)
2954 goto done;
2955 }
2956
2957 assert(emit->dynamic_branching_level == 0);
2958
2959 /* Need to terminate the whole shader:
2960 */
2961 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
2962 if (!ret)
2963 goto done;
2964
2965 done:
2966 assert(ret);
2967 tgsi_parse_free( &parse );
2968 return ret;
2969 }
2970