6debd98b7a1605ccbc3f208837dfaea77082bf76
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "util/u_memory.h"
30
31 #include "svga_tgsi_emit.h"
32 #include "svga_context.h"
33
34
35 static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
36 static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
37
38
39
40
41 static unsigned
42 translate_opcode(
43 uint opcode )
44 {
45 switch (opcode) {
46 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
47 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
48 case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC;
49 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
50 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
51 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
52 case TGSI_OPCODE_ENDFOR: return SVGA3DOP_ENDLOOP;
53 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
54 case TGSI_OPCODE_BGNFOR: return SVGA3DOP_LOOP;
55 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
56 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
57 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
58 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
59 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
60 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
61 case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
62 case TGSI_OPCODE_SSG: return SVGA3DOP_SGN;
63 default:
64 debug_printf("Unkown opcode %u\n", opcode);
65 assert( 0 );
66 return SVGA3DOP_LAST_INST;
67 }
68 }
69
70
71 static unsigned translate_file( unsigned file )
72 {
73 switch (file) {
74 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
75 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
76 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
77 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
78 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
79 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
80 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
81 default:
82 assert( 0 );
83 return SVGA3DREG_TEMP;
84 }
85 }
86
87
88
89
90
91
92 static SVGA3dShaderDestToken
93 translate_dst_register( struct svga_shader_emitter *emit,
94 const struct tgsi_full_instruction *insn,
95 unsigned idx )
96 {
97 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
98 SVGA3dShaderDestToken dest;
99
100 switch (reg->Register.File) {
101 case TGSI_FILE_OUTPUT:
102 /* Output registers encode semantic information in their name.
103 * Need to lookup a table built at decl time:
104 */
105 dest = emit->output_map[reg->Register.Index];
106 break;
107
108 default:
109 dest = dst_register( translate_file( reg->Register.File ),
110 reg->Register.Index );
111 break;
112 }
113
114 dest.mask = reg->Register.WriteMask;
115
116 if (insn->Instruction.Saturate)
117 dest.dstMod = SVGA3DDSTMOD_SATURATE;
118
119 return dest;
120 }
121
122
123 static struct src_register
124 swizzle( struct src_register src,
125 int x,
126 int y,
127 int z,
128 int w )
129 {
130 x = (src.base.swizzle >> (x * 2)) & 0x3;
131 y = (src.base.swizzle >> (y * 2)) & 0x3;
132 z = (src.base.swizzle >> (z * 2)) & 0x3;
133 w = (src.base.swizzle >> (w * 2)) & 0x3;
134
135 src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
136
137 return src;
138 }
139
140 static struct src_register
141 scalar( struct src_register src,
142 int comp )
143 {
144 return swizzle( src, comp, comp, comp, comp );
145 }
146
147 static INLINE boolean
148 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
149 {
150 int i;
151
152 for (i = 0; i < emit->num_arl_consts; ++i) {
153 if (emit->arl_consts[i].arl_num == emit->current_arl)
154 return TRUE;
155 }
156 return FALSE;
157 }
158
159 static INLINE int
160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
161 {
162 int i;
163
164 for (i = 0; i < emit->num_arl_consts; ++i) {
165 if (emit->arl_consts[i].arl_num == emit->current_arl)
166 return emit->arl_consts[i].number;
167 }
168 return 0;
169 }
170
171 static struct src_register
172 translate_src_register( const struct svga_shader_emitter *emit,
173 const struct tgsi_full_src_register *reg )
174 {
175 struct src_register src;
176
177 switch (reg->Register.File) {
178 case TGSI_FILE_INPUT:
179 /* Input registers are referred to by their semantic name rather
180 * than by index. Use the mapping build up from the decls:
181 */
182 src = emit->input_map[reg->Register.Index];
183 break;
184
185 case TGSI_FILE_IMMEDIATE:
186 /* Immediates are appended after TGSI constants in the D3D
187 * constant buffer.
188 */
189 src = src_register( translate_file( reg->Register.File ),
190 reg->Register.Index +
191 emit->imm_start );
192 break;
193
194 default:
195 src = src_register( translate_file( reg->Register.File ),
196 reg->Register.Index );
197
198 break;
199 }
200
201 /* Indirect addressing (for coninstant buffer lookups only)
202 */
203 if (reg->Register.Indirect)
204 {
205 /* we shift the offset towards the minimum */
206 if (svga_arl_needs_adjustment( emit )) {
207 src.base.num -= svga_arl_adjustment( emit );
208 }
209 src.base.relAddr = 1;
210
211 /* Not really sure what should go in the second token:
212 */
213 src.indirect = src_token( SVGA3DREG_ADDR,
214 reg->Indirect.Index );
215
216 src.indirect.swizzle = SWIZZLE_XXXX;
217 }
218
219 src = swizzle( src,
220 reg->Register.SwizzleX,
221 reg->Register.SwizzleY,
222 reg->Register.SwizzleZ,
223 reg->Register.SwizzleW );
224
225 /* src.mod isn't a bitfield, unfortunately:
226 * See tgsi_util_get_full_src_register_sign_mode for implementation details.
227 */
228 if (reg->Register.Absolute) {
229 if (reg->Register.Negate)
230 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
231 else
232 src.base.srcMod = SVGA3DSRCMOD_ABS;
233 }
234 else {
235 if (reg->Register.Negate)
236 src.base.srcMod = SVGA3DSRCMOD_NEG;
237 else
238 src.base.srcMod = SVGA3DSRCMOD_NONE;
239 }
240
241 return src;
242 }
243
244
245 /*
246 * Get a temporary register, return -1 if none available
247 */
248 static INLINE SVGA3dShaderDestToken
249 get_temp( struct svga_shader_emitter *emit )
250 {
251 int i = emit->nr_hw_temp + emit->internal_temp_count++;
252
253 return dst_register( SVGA3DREG_TEMP, i );
254 }
255
256 /* Release a single temp. Currently only effective if it was the last
257 * allocated temp, otherwise release will be delayed until the next
258 * call to reset_temp_regs().
259 */
260 static INLINE void
261 release_temp( struct svga_shader_emitter *emit,
262 SVGA3dShaderDestToken temp )
263 {
264 if (temp.num == emit->internal_temp_count - 1)
265 emit->internal_temp_count--;
266 }
267
268 static void reset_temp_regs( struct svga_shader_emitter *emit )
269 {
270 emit->internal_temp_count = 0;
271 }
272
273
274 static boolean submit_op0( struct svga_shader_emitter *emit,
275 SVGA3dShaderInstToken inst,
276 SVGA3dShaderDestToken dest )
277 {
278 return (emit_instruction( emit, inst ) &&
279 emit_dst( emit, dest ));
280 }
281
282 static boolean submit_op1( struct svga_shader_emitter *emit,
283 SVGA3dShaderInstToken inst,
284 SVGA3dShaderDestToken dest,
285 struct src_register src0 )
286 {
287 return emit_op1( emit, inst, dest, src0 );
288 }
289
290
291 /* SVGA shaders may not refer to >1 constant register in a single
292 * instruction. This function checks for that usage and inserts a
293 * move to temporary if detected.
294 *
295 * The same applies to input registers -- at most a single input
296 * register may be read by any instruction.
297 */
298 static boolean submit_op2( struct svga_shader_emitter *emit,
299 SVGA3dShaderInstToken inst,
300 SVGA3dShaderDestToken dest,
301 struct src_register src0,
302 struct src_register src1 )
303 {
304 SVGA3dShaderDestToken temp;
305 SVGA3dShaderRegType type0, type1;
306 boolean need_temp = FALSE;
307
308 temp.value = 0;
309 type0 = SVGA3dShaderGetRegType( src0.base.value );
310 type1 = SVGA3dShaderGetRegType( src1.base.value );
311
312 if (type0 == SVGA3DREG_CONST &&
313 type1 == SVGA3DREG_CONST &&
314 src0.base.num != src1.base.num)
315 need_temp = TRUE;
316
317 if (type0 == SVGA3DREG_INPUT &&
318 type1 == SVGA3DREG_INPUT &&
319 src0.base.num != src1.base.num)
320 need_temp = TRUE;
321
322 if (need_temp)
323 {
324 temp = get_temp( emit );
325
326 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
327 return FALSE;
328
329 src0 = src( temp );
330 }
331
332 if (!emit_op2( emit, inst, dest, src0, src1 ))
333 return FALSE;
334
335 if (need_temp)
336 release_temp( emit, temp );
337
338 return TRUE;
339 }
340
341
342 /* SVGA shaders may not refer to >1 constant register in a single
343 * instruction. This function checks for that usage and inserts a
344 * move to temporary if detected.
345 */
346 static boolean submit_op3( struct svga_shader_emitter *emit,
347 SVGA3dShaderInstToken inst,
348 SVGA3dShaderDestToken dest,
349 struct src_register src0,
350 struct src_register src1,
351 struct src_register src2 )
352 {
353 SVGA3dShaderDestToken temp0;
354 SVGA3dShaderDestToken temp1;
355 boolean need_temp0 = FALSE;
356 boolean need_temp1 = FALSE;
357 SVGA3dShaderRegType type0, type1, type2;
358
359 temp0.value = 0;
360 temp1.value = 0;
361 type0 = SVGA3dShaderGetRegType( src0.base.value );
362 type1 = SVGA3dShaderGetRegType( src1.base.value );
363 type2 = SVGA3dShaderGetRegType( src2.base.value );
364
365 if (inst.op != SVGA3DOP_SINCOS) {
366 if (type0 == SVGA3DREG_CONST &&
367 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
368 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
369 need_temp0 = TRUE;
370
371 if (type1 == SVGA3DREG_CONST &&
372 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
373 need_temp1 = TRUE;
374 }
375
376 if (type0 == SVGA3DREG_INPUT &&
377 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
378 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
379 need_temp0 = TRUE;
380
381 if (type1 == SVGA3DREG_INPUT &&
382 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
383 need_temp1 = TRUE;
384
385 if (need_temp0)
386 {
387 temp0 = get_temp( emit );
388
389 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
390 return FALSE;
391
392 src0 = src( temp0 );
393 }
394
395 if (need_temp1)
396 {
397 temp1 = get_temp( emit );
398
399 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
400 return FALSE;
401
402 src1 = src( temp1 );
403 }
404
405 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
406 return FALSE;
407
408 if (need_temp1)
409 release_temp( emit, temp1 );
410 if (need_temp0)
411 release_temp( emit, temp0 );
412 return TRUE;
413 }
414
415
416
417
418 /* SVGA shaders may not refer to >1 constant register in a single
419 * instruction. This function checks for that usage and inserts a
420 * move to temporary if detected.
421 */
422 static boolean submit_op4( struct svga_shader_emitter *emit,
423 SVGA3dShaderInstToken inst,
424 SVGA3dShaderDestToken dest,
425 struct src_register src0,
426 struct src_register src1,
427 struct src_register src2,
428 struct src_register src3)
429 {
430 SVGA3dShaderDestToken temp0;
431 SVGA3dShaderDestToken temp3;
432 boolean need_temp0 = FALSE;
433 boolean need_temp3 = FALSE;
434 SVGA3dShaderRegType type0, type1, type2, type3;
435
436 temp0.value = 0;
437 temp3.value = 0;
438 type0 = SVGA3dShaderGetRegType( src0.base.value );
439 type1 = SVGA3dShaderGetRegType( src1.base.value );
440 type2 = SVGA3dShaderGetRegType( src2.base.value );
441 type3 = SVGA3dShaderGetRegType( src2.base.value );
442
443 /* Make life a little easier - this is only used by the TXD
444 * instruction which is guaranteed not to have a constant/input reg
445 * in one slot at least:
446 */
447 assert(type1 == SVGA3DREG_SAMPLER);
448
449 if (type0 == SVGA3DREG_CONST &&
450 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
451 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
452 need_temp0 = TRUE;
453
454 if (type3 == SVGA3DREG_CONST &&
455 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
456 need_temp3 = TRUE;
457
458 if (type0 == SVGA3DREG_INPUT &&
459 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
460 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
461 need_temp0 = TRUE;
462
463 if (type3 == SVGA3DREG_INPUT &&
464 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
465 need_temp3 = TRUE;
466
467 if (need_temp0)
468 {
469 temp0 = get_temp( emit );
470
471 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
472 return FALSE;
473
474 src0 = src( temp0 );
475 }
476
477 if (need_temp3)
478 {
479 temp3 = get_temp( emit );
480
481 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp3, src3 ))
482 return FALSE;
483
484 src3 = src( temp3 );
485 }
486
487 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
488 return FALSE;
489
490 if (need_temp3)
491 release_temp( emit, temp3 );
492 if (need_temp0)
493 release_temp( emit, temp0 );
494 return TRUE;
495 }
496
497
498 static boolean emit_def_const( struct svga_shader_emitter *emit,
499 SVGA3dShaderConstType type,
500 unsigned idx,
501 float a,
502 float b,
503 float c,
504 float d )
505 {
506 SVGA3DOpDefArgs def;
507 SVGA3dShaderInstToken opcode;
508
509 switch (type) {
510 case SVGA3D_CONST_TYPE_FLOAT:
511 opcode = inst_token( SVGA3DOP_DEF );
512 def.dst = dst_register( SVGA3DREG_CONST, idx );
513 def.constValues[0] = a;
514 def.constValues[1] = b;
515 def.constValues[2] = c;
516 def.constValues[3] = d;
517 break;
518 case SVGA3D_CONST_TYPE_INT:
519 opcode = inst_token( SVGA3DOP_DEFI );
520 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
521 def.constIValues[0] = (int)a;
522 def.constIValues[1] = (int)b;
523 def.constIValues[2] = (int)c;
524 def.constIValues[3] = (int)d;
525 break;
526 default:
527 assert(0);
528 opcode = inst_token( SVGA3DOP_NOP );
529 break;
530 }
531
532 if (!emit_instruction(emit, opcode) ||
533 !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
534 return FALSE;
535
536 return TRUE;
537 }
538
539 static INLINE boolean
540 create_zero_immediate( struct svga_shader_emitter *emit )
541 {
542 unsigned idx = emit->nr_hw_const++;
543
544 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
545 idx, 0, 0, 0, 1 ))
546 return FALSE;
547
548 emit->zero_immediate_idx = idx;
549 emit->created_zero_immediate = TRUE;
550
551 return TRUE;
552 }
553
554 static INLINE boolean
555 create_loop_const( struct svga_shader_emitter *emit )
556 {
557 unsigned idx = emit->nr_hw_const++;
558
559 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
560 255, /* iteration count */
561 0, /* initial value */
562 1, /* step size */
563 0 /* not used, must be 0 */))
564 return FALSE;
565
566 emit->loop_const_idx = idx;
567 emit->created_loop_const = TRUE;
568
569 return TRUE;
570 }
571
572 static INLINE boolean
573 create_sincos_consts( struct svga_shader_emitter *emit )
574 {
575 unsigned idx = emit->nr_hw_const++;
576
577 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
578 -1.5500992e-006f,
579 -2.1701389e-005f,
580 0.0026041667f,
581 0.00026041668f ))
582 return FALSE;
583
584 emit->sincos_consts_idx = idx;
585 idx = emit->nr_hw_const++;
586
587 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
588 -0.020833334f,
589 -0.12500000f,
590 1.0f,
591 0.50000000f ))
592 return FALSE;
593
594 emit->created_sincos_consts = TRUE;
595
596 return TRUE;
597 }
598
599 static INLINE boolean
600 create_arl_consts( struct svga_shader_emitter *emit )
601 {
602 int i;
603
604 for (i = 0; i < emit->num_arl_consts; i += 4) {
605 int j;
606 unsigned idx = emit->nr_hw_const++;
607 float vals[4];
608 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
609 vals[j] = emit->arl_consts[i + j].number;
610 emit->arl_consts[i + j].idx = idx;
611 switch (j) {
612 case 0:
613 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
614 break;
615 case 1:
616 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
617 break;
618 case 2:
619 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
620 break;
621 case 3:
622 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
623 break;
624 }
625 }
626 while (j < 4)
627 vals[j++] = 0;
628
629 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
630 vals[0], vals[1],
631 vals[2], vals[3]))
632 return FALSE;
633 }
634
635 return TRUE;
636 }
637
638 static INLINE struct src_register
639 get_vface( struct svga_shader_emitter *emit )
640 {
641 assert(emit->emitted_vface);
642 return src_register(SVGA3DREG_MISCTYPE,
643 SVGA3DMISCREG_FACE);
644 }
645
646 /* returns {0, 0, 0, 1} immediate */
647 static INLINE struct src_register
648 get_zero_immediate( struct svga_shader_emitter *emit )
649 {
650 assert(emit->created_zero_immediate);
651 assert(emit->zero_immediate_idx >= 0);
652 return src_register( SVGA3DREG_CONST,
653 emit->zero_immediate_idx );
654 }
655
656 /* returns the loop const */
657 static INLINE struct src_register
658 get_loop_const( struct svga_shader_emitter *emit )
659 {
660 assert(emit->created_loop_const);
661 assert(emit->loop_const_idx >= 0);
662 return src_register( SVGA3DREG_CONSTINT,
663 emit->loop_const_idx );
664 }
665
666 /* returns a sincos const */
667 static INLINE struct src_register
668 get_sincos_const( struct svga_shader_emitter *emit,
669 unsigned index )
670 {
671 assert(emit->created_sincos_consts);
672 assert(emit->sincos_consts_idx >= 0);
673 assert(index == 0 || index == 1);
674 return src_register( SVGA3DREG_CONST,
675 emit->sincos_consts_idx + index );
676 }
677
678 static INLINE struct src_register
679 get_fake_arl_const( struct svga_shader_emitter *emit )
680 {
681 struct src_register reg;
682 int idx = 0, swizzle = 0, i;
683
684 for (i = 0; i < emit->num_arl_consts; ++ i) {
685 if (emit->arl_consts[i].arl_num == emit->current_arl) {
686 idx = emit->arl_consts[i].idx;
687 swizzle = emit->arl_consts[i].swizzle;
688 }
689 }
690
691 reg = src_register( SVGA3DREG_CONST, idx );
692 return scalar(reg, swizzle);
693 }
694
695 static INLINE struct src_register
696 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
697 {
698 int idx;
699 struct src_register reg;
700
701 /* the width/height indexes start right after constants */
702 idx = emit->key.fkey.tex[sampler_num].width_height_idx +
703 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
704
705 reg = src_register( SVGA3DREG_CONST, idx );
706 return reg;
707 }
708
709 static boolean emit_fake_arl(struct svga_shader_emitter *emit,
710 const struct tgsi_full_instruction *insn)
711 {
712 const struct src_register src0 = translate_src_register(
713 emit, &insn->Src[0] );
714 struct src_register src1 = get_fake_arl_const( emit );
715 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
716 SVGA3dShaderDestToken tmp = get_temp( emit );
717
718 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
719 return FALSE;
720
721 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
722 src1))
723 return FALSE;
724
725 /* replicate the original swizzle */
726 src1 = src(tmp);
727 src1.base.swizzle = src0.base.swizzle;
728
729 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
730 dst, src1 );
731 }
732
733 static boolean emit_if(struct svga_shader_emitter *emit,
734 const struct tgsi_full_instruction *insn)
735 {
736 const struct src_register src = translate_src_register(
737 emit, &insn->Src[0] );
738 struct src_register zero = get_zero_immediate( emit );
739 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
740
741 if_token.control = SVGA3DOPCOMPC_NE;
742 zero = scalar(zero, TGSI_SWIZZLE_X);
743
744 emit->dynamic_branching_level++;
745
746 return (emit_instruction( emit, if_token ) &&
747 emit_src( emit, src ) &&
748 emit_src( emit, zero ) );
749 }
750
751 static boolean emit_endif(struct svga_shader_emitter *emit,
752 const struct tgsi_full_instruction *insn)
753 {
754 emit->dynamic_branching_level--;
755
756 return (emit_instruction( emit,
757 inst_token( SVGA3DOP_ENDIF )));
758 }
759
760 static boolean emit_else(struct svga_shader_emitter *emit,
761 const struct tgsi_full_instruction *insn)
762 {
763 return (emit_instruction( emit,
764 inst_token( SVGA3DOP_ELSE )));
765 }
766
767 /* Translate the following TGSI FLR instruction.
768 * FLR DST, SRC
769 * To the following SVGA3D instruction sequence.
770 * FRC TMP, SRC
771 * SUB DST, SRC, TMP
772 */
773 static boolean emit_floor(struct svga_shader_emitter *emit,
774 const struct tgsi_full_instruction *insn )
775 {
776 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
777 const struct src_register src0 = translate_src_register(
778 emit, &insn->Src[0] );
779 SVGA3dShaderDestToken temp = get_temp( emit );
780
781 /* FRC TMP, SRC */
782 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
783 return FALSE;
784
785 /* SUB DST, SRC, TMP */
786 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
787 negate( src( temp ) ) ))
788 return FALSE;
789
790 return TRUE;
791 }
792
793
794 /* Translate the following TGSI CMP instruction.
795 * CMP DST, SRC0, SRC1, SRC2
796 * To the following SVGA3D instruction sequence.
797 * CMP DST, SRC0, SRC2, SRC1
798 */
799 static boolean emit_cmp(struct svga_shader_emitter *emit,
800 const struct tgsi_full_instruction *insn )
801 {
802 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
803 const struct src_register src0 = translate_src_register(
804 emit, &insn->Src[0] );
805 const struct src_register src1 = translate_src_register(
806 emit, &insn->Src[1] );
807 const struct src_register src2 = translate_src_register(
808 emit, &insn->Src[2] );
809
810 /* CMP DST, SRC0, SRC2, SRC1 */
811 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
812 }
813
814
815
816 /* Translate the following TGSI DIV instruction.
817 * DIV DST.xy, SRC0, SRC1
818 * To the following SVGA3D instruction sequence.
819 * RCP TMP.x, SRC1.xxxx
820 * RCP TMP.y, SRC1.yyyy
821 * MUL DST.xy, SRC0, TMP
822 */
823 static boolean emit_div(struct svga_shader_emitter *emit,
824 const struct tgsi_full_instruction *insn )
825 {
826 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
827 const struct src_register src0 = translate_src_register(
828 emit, &insn->Src[0] );
829 const struct src_register src1 = translate_src_register(
830 emit, &insn->Src[1] );
831 SVGA3dShaderDestToken temp = get_temp( emit );
832 int i;
833
834 /* For each enabled element, perform a RCP instruction. Note that
835 * RCP is scalar in SVGA3D:
836 */
837 for (i = 0; i < 4; i++) {
838 unsigned channel = 1 << i;
839 if (dst.mask & channel) {
840 /* RCP TMP.?, SRC1.???? */
841 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
842 writemask(temp, channel),
843 scalar(src1, i) ))
844 return FALSE;
845 }
846 }
847
848 /* Then multiply them out with a single mul:
849 *
850 * MUL DST, SRC0, TMP
851 */
852 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
853 src( temp ) ))
854 return FALSE;
855
856 return TRUE;
857 }
858
859 /* Translate the following TGSI DP2 instruction.
860 * DP2 DST, SRC1, SRC2
861 * To the following SVGA3D instruction sequence.
862 * MUL TMP, SRC1, SRC2
863 * ADD DST, TMP.xxxx, TMP.yyyy
864 */
865 static boolean emit_dp2(struct svga_shader_emitter *emit,
866 const struct tgsi_full_instruction *insn )
867 {
868 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
869 const struct src_register src0 = translate_src_register(
870 emit, &insn->Src[0] );
871 const struct src_register src1 = translate_src_register(
872 emit, &insn->Src[1] );
873 SVGA3dShaderDestToken temp = get_temp( emit );
874 struct src_register temp_src0, temp_src1;
875
876 /* MUL TMP, SRC1, SRC2 */
877 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
878 return FALSE;
879
880 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
881 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
882
883 /* ADD DST, TMP.xxxx, TMP.yyyy */
884 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
885 temp_src0, temp_src1 ))
886 return FALSE;
887
888 return TRUE;
889 }
890
891
892 /* Translate the following TGSI DPH instruction.
893 * DPH DST, SRC1, SRC2
894 * To the following SVGA3D instruction sequence.
895 * DP3 TMP, SRC1, SRC2
896 * ADD DST, TMP, SRC2.wwww
897 */
898 static boolean emit_dph(struct svga_shader_emitter *emit,
899 const struct tgsi_full_instruction *insn )
900 {
901 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
902 const struct src_register src0 = translate_src_register(
903 emit, &insn->Src[0] );
904 struct src_register src1 = translate_src_register(
905 emit, &insn->Src[1] );
906 SVGA3dShaderDestToken temp = get_temp( emit );
907
908 /* DP3 TMP, SRC1, SRC2 */
909 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
910 return FALSE;
911
912 src1 = scalar(src1, TGSI_SWIZZLE_W);
913
914 /* ADD DST, TMP, SRC2.wwww */
915 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
916 src( temp ), src1 ))
917 return FALSE;
918
919 return TRUE;
920 }
921
922 /* Translate the following TGSI DST instruction.
923 * NRM DST, SRC
924 * To the following SVGA3D instruction sequence.
925 * DP3 TMP, SRC, SRC
926 * RSQ TMP, TMP
927 * MUL DST, SRC, TMP
928 */
929 static boolean emit_nrm(struct svga_shader_emitter *emit,
930 const struct tgsi_full_instruction *insn )
931 {
932 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
933 const struct src_register src0 = translate_src_register(
934 emit, &insn->Src[0] );
935 SVGA3dShaderDestToken temp = get_temp( emit );
936
937 /* DP3 TMP, SRC, SRC */
938 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
939 return FALSE;
940
941 /* RSQ TMP, TMP */
942 if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
943 return FALSE;
944
945 /* MUL DST, SRC, TMP */
946 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
947 src0, src( temp )))
948 return FALSE;
949
950 return TRUE;
951
952 }
953
954 static boolean do_emit_sincos(struct svga_shader_emitter *emit,
955 SVGA3dShaderDestToken dst,
956 struct src_register src0)
957 {
958 src0 = scalar(src0, TGSI_SWIZZLE_X);
959
960 if (emit->use_sm30) {
961 return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
962 dst, src0 );
963 } else {
964 struct src_register const1 = get_sincos_const( emit, 0 );
965 struct src_register const2 = get_sincos_const( emit, 1 );
966
967 return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
968 dst, src0, const1, const2 );
969 }
970 }
971
972 static boolean emit_sincos(struct svga_shader_emitter *emit,
973 const struct tgsi_full_instruction *insn)
974 {
975 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
976 struct src_register src0 = translate_src_register(
977 emit, &insn->Src[0] );
978 SVGA3dShaderDestToken temp = get_temp( emit );
979
980 /* SCS TMP SRC */
981 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
982 return FALSE;
983
984 /* MOV DST TMP */
985 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
986 return FALSE;
987
988 return TRUE;
989 }
990
991 /*
992 * SCS TMP SRC
993 * MOV DST TMP.yyyy
994 */
995 static boolean emit_sin(struct svga_shader_emitter *emit,
996 const struct tgsi_full_instruction *insn )
997 {
998 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
999 struct src_register src0 = translate_src_register(
1000 emit, &insn->Src[0] );
1001 SVGA3dShaderDestToken temp = get_temp( emit );
1002
1003 /* SCS TMP SRC */
1004 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1005 return FALSE;
1006
1007 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1008
1009 /* MOV DST TMP.yyyy */
1010 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1011 return FALSE;
1012
1013 return TRUE;
1014 }
1015
1016 /*
1017 * SCS TMP SRC
1018 * MOV DST TMP.xxxx
1019 */
1020 static boolean emit_cos(struct svga_shader_emitter *emit,
1021 const struct tgsi_full_instruction *insn )
1022 {
1023 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1024 struct src_register src0 = translate_src_register(
1025 emit, &insn->Src[0] );
1026 SVGA3dShaderDestToken temp = get_temp( emit );
1027
1028 /* SCS TMP SRC */
1029 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1030 return FALSE;
1031
1032 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1033
1034 /* MOV DST TMP.xxxx */
1035 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1036 return FALSE;
1037
1038 return TRUE;
1039 }
1040
1041
1042 /*
1043 * ADD DST SRC0, negate(SRC0)
1044 */
1045 static boolean emit_sub(struct svga_shader_emitter *emit,
1046 const struct tgsi_full_instruction *insn)
1047 {
1048 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1049 struct src_register src0 = translate_src_register(
1050 emit, &insn->Src[0] );
1051 struct src_register src1 = translate_src_register(
1052 emit, &insn->Src[1] );
1053
1054 src1 = negate(src1);
1055
1056 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1057 src0, src1 ))
1058 return FALSE;
1059
1060 return TRUE;
1061 }
1062
1063
1064 static boolean emit_kil(struct svga_shader_emitter *emit,
1065 const struct tgsi_full_instruction *insn )
1066 {
1067 SVGA3dShaderInstToken inst;
1068 const struct tgsi_full_src_register *reg = &insn->Src[0];
1069 struct src_register src0;
1070
1071 inst = inst_token( SVGA3DOP_TEXKILL );
1072 src0 = translate_src_register( emit, reg );
1073
1074 if (reg->Register.Absolute ||
1075 reg->Register.Negate ||
1076 reg->Register.Indirect ||
1077 reg->Register.SwizzleX != 0 ||
1078 reg->Register.SwizzleY != 1 ||
1079 reg->Register.SwizzleZ != 2 ||
1080 reg->Register.File != TGSI_FILE_TEMPORARY)
1081 {
1082 SVGA3dShaderDestToken temp = get_temp( emit );
1083
1084 submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
1085 src0 = src( temp );
1086 }
1087
1088 return submit_op0( emit, inst, dst(src0) );
1089 }
1090
1091
1092 /* mesa state tracker always emits kilp as an unconditional
1093 * kil */
1094 static boolean emit_kilp(struct svga_shader_emitter *emit,
1095 const struct tgsi_full_instruction *insn )
1096 {
1097 SVGA3dShaderInstToken inst;
1098 SVGA3dShaderDestToken temp;
1099 struct src_register one = scalar( get_zero_immediate( emit ),
1100 TGSI_SWIZZLE_W );
1101
1102 inst = inst_token( SVGA3DOP_TEXKILL );
1103
1104 /* texkill doesn't allow negation on the operand so lets move
1105 * negation of {1} to a temp register */
1106 temp = get_temp( emit );
1107 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1108 negate( one ) ))
1109 return FALSE;
1110
1111 return submit_op0( emit, inst, temp );
1112 }
1113
1114 /* Implement conditionals by initializing destination reg to 'fail',
1115 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1116 * based on predicate reg.
1117 *
1118 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1119 * MOV dst, fail
1120 * MOV dst, pass, p0
1121 */
1122 static boolean
1123 emit_conditional(struct svga_shader_emitter *emit,
1124 unsigned compare_func,
1125 SVGA3dShaderDestToken dst,
1126 struct src_register src0,
1127 struct src_register src1,
1128 struct src_register pass,
1129 struct src_register fail)
1130 {
1131 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1132 SVGA3dShaderInstToken setp_token, mov_token;
1133 setp_token = inst_token( SVGA3DOP_SETP );
1134
1135 switch (compare_func) {
1136 case PIPE_FUNC_NEVER:
1137 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1138 dst, fail );
1139 break;
1140 case PIPE_FUNC_LESS:
1141 setp_token.control = SVGA3DOPCOMP_LT;
1142 break;
1143 case PIPE_FUNC_EQUAL:
1144 setp_token.control = SVGA3DOPCOMP_EQ;
1145 break;
1146 case PIPE_FUNC_LEQUAL:
1147 setp_token.control = SVGA3DOPCOMP_LE;
1148 break;
1149 case PIPE_FUNC_GREATER:
1150 setp_token.control = SVGA3DOPCOMP_GT;
1151 break;
1152 case PIPE_FUNC_NOTEQUAL:
1153 setp_token.control = SVGA3DOPCOMPC_NE;
1154 break;
1155 case PIPE_FUNC_GEQUAL:
1156 setp_token.control = SVGA3DOPCOMP_GE;
1157 break;
1158 case PIPE_FUNC_ALWAYS:
1159 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1160 dst, pass );
1161 break;
1162 }
1163
1164 /* SETP src0, COMPOP, src1 */
1165 if (!submit_op2( emit, setp_token, pred_reg,
1166 src0, src1 ))
1167 return FALSE;
1168
1169 mov_token = inst_token( SVGA3DOP_MOV );
1170
1171 /* MOV dst, fail */
1172 if (!submit_op1( emit, mov_token, dst,
1173 fail ))
1174 return FALSE;
1175
1176 /* MOV dst, pass (predicated)
1177 *
1178 * Note that the predicate reg (and possible modifiers) is passed
1179 * as the first source argument.
1180 */
1181 mov_token.predicated = 1;
1182 if (!submit_op2( emit, mov_token, dst,
1183 src( pred_reg ), pass ))
1184 return FALSE;
1185
1186 return TRUE;
1187 }
1188
1189
1190 static boolean
1191 emit_select(struct svga_shader_emitter *emit,
1192 unsigned compare_func,
1193 SVGA3dShaderDestToken dst,
1194 struct src_register src0,
1195 struct src_register src1 )
1196 {
1197 /* There are some SVGA instructions which implement some selects
1198 * directly, but they are only available in the vertex shader.
1199 */
1200 if (emit->unit == PIPE_SHADER_VERTEX) {
1201 switch (compare_func) {
1202 case PIPE_FUNC_GEQUAL:
1203 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1204 case PIPE_FUNC_LEQUAL:
1205 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1206 case PIPE_FUNC_GREATER:
1207 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1208 case PIPE_FUNC_LESS:
1209 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1210 default:
1211 break;
1212 }
1213 }
1214
1215
1216 /* Otherwise, need to use the setp approach:
1217 */
1218 {
1219 struct src_register one, zero;
1220 /* zero immediate is 0,0,0,1 */
1221 zero = get_zero_immediate( emit );
1222 one = scalar( zero, TGSI_SWIZZLE_W );
1223 zero = scalar( zero, TGSI_SWIZZLE_X );
1224
1225 return emit_conditional(
1226 emit,
1227 compare_func,
1228 dst,
1229 src0,
1230 src1,
1231 one, zero);
1232 }
1233 }
1234
1235
1236 static boolean emit_select_op(struct svga_shader_emitter *emit,
1237 unsigned compare,
1238 const struct tgsi_full_instruction *insn)
1239 {
1240 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1241 struct src_register src0 = translate_src_register(
1242 emit, &insn->Src[0] );
1243 struct src_register src1 = translate_src_register(
1244 emit, &insn->Src[1] );
1245
1246 return emit_select( emit, compare, dst, src0, src1 );
1247 }
1248
1249
1250 /* Translate texture instructions to SVGA3D representation.
1251 */
1252 static boolean emit_tex2(struct svga_shader_emitter *emit,
1253 const struct tgsi_full_instruction *insn,
1254 SVGA3dShaderDestToken dst )
1255 {
1256 SVGA3dShaderInstToken inst;
1257 struct src_register texcoord;
1258 struct src_register sampler;
1259 SVGA3dShaderDestToken tmp;
1260
1261 inst.value = 0;
1262
1263 switch (insn->Instruction.Opcode) {
1264 case TGSI_OPCODE_TEX:
1265 inst.op = SVGA3DOP_TEX;
1266 break;
1267 case TGSI_OPCODE_TXP:
1268 inst.op = SVGA3DOP_TEX;
1269 inst.control = SVGA3DOPCONT_PROJECT;
1270 break;
1271 case TGSI_OPCODE_TXB:
1272 inst.op = SVGA3DOP_TEX;
1273 inst.control = SVGA3DOPCONT_BIAS;
1274 break;
1275 case TGSI_OPCODE_TXL:
1276 inst.op = SVGA3DOP_TEXLDL;
1277 break;
1278 default:
1279 assert(0);
1280 return FALSE;
1281 }
1282
1283 texcoord = translate_src_register( emit, &insn->Src[0] );
1284 sampler = translate_src_register( emit, &insn->Src[1] );
1285
1286 if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
1287 emit->dynamic_branching_level > 0)
1288 tmp = get_temp( emit );
1289
1290 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1291 * zero in that case.
1292 */
1293 if (emit->dynamic_branching_level > 0 &&
1294 inst.op == SVGA3DOP_TEX &&
1295 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1296 struct src_register zero = get_zero_immediate( emit );
1297
1298 /* MOV tmp, texcoord */
1299 if (!submit_op1( emit,
1300 inst_token( SVGA3DOP_MOV ),
1301 tmp,
1302 texcoord ))
1303 return FALSE;
1304
1305 /* MOV tmp.w, zero */
1306 if (!submit_op1( emit,
1307 inst_token( SVGA3DOP_MOV ),
1308 writemask( tmp, TGSI_WRITEMASK_W ),
1309 scalar( zero, TGSI_SWIZZLE_X )))
1310 return FALSE;
1311
1312 texcoord = src( tmp );
1313 inst.op = SVGA3DOP_TEXLDL;
1314 }
1315
1316 /* Explicit normalization of texcoords:
1317 */
1318 if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
1319 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1320
1321 /* MUL tmp, SRC0, WH */
1322 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1323 tmp, texcoord, wh ))
1324 return FALSE;
1325
1326 texcoord = src( tmp );
1327 }
1328
1329 return submit_op2( emit, inst, dst, texcoord, sampler );
1330 }
1331
1332
1333
1334
1335 /* Translate texture instructions to SVGA3D representation.
1336 */
1337 static boolean emit_tex4(struct svga_shader_emitter *emit,
1338 const struct tgsi_full_instruction *insn,
1339 SVGA3dShaderDestToken dst )
1340 {
1341 SVGA3dShaderInstToken inst;
1342 struct src_register texcoord;
1343 struct src_register ddx;
1344 struct src_register ddy;
1345 struct src_register sampler;
1346
1347 texcoord = translate_src_register( emit, &insn->Src[0] );
1348 ddx = translate_src_register( emit, &insn->Src[1] );
1349 ddy = translate_src_register( emit, &insn->Src[2] );
1350 sampler = translate_src_register( emit, &insn->Src[3] );
1351
1352 inst.value = 0;
1353
1354 switch (insn->Instruction.Opcode) {
1355 case TGSI_OPCODE_TXD:
1356 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1357 break;
1358 default:
1359 assert(0);
1360 return FALSE;
1361 }
1362
1363 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1364 }
1365
1366
1367 static boolean emit_tex(struct svga_shader_emitter *emit,
1368 const struct tgsi_full_instruction *insn )
1369 {
1370 SVGA3dShaderDestToken dst =
1371 translate_dst_register( emit, insn, 0 );
1372 struct src_register src0 =
1373 translate_src_register( emit, &insn->Src[0] );
1374 struct src_register src1 =
1375 translate_src_register( emit, &insn->Src[1] );
1376
1377 SVGA3dShaderDestToken tex_result;
1378
1379 /* check for shadow samplers */
1380 boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
1381 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1382
1383
1384 /* If doing compare processing, need to put this value into a
1385 * temporary so it can be used as a source later on.
1386 */
1387 if (compare ||
1388 (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
1389 tex_result = get_temp( emit );
1390 }
1391 else {
1392 tex_result = dst;
1393 }
1394
1395 switch(insn->Instruction.Opcode) {
1396 case TGSI_OPCODE_TEX:
1397 case TGSI_OPCODE_TXB:
1398 case TGSI_OPCODE_TXP:
1399 case TGSI_OPCODE_TXL:
1400 if (!emit_tex2( emit, insn, tex_result ))
1401 return FALSE;
1402 break;
1403 case TGSI_OPCODE_TXD:
1404 if (!emit_tex4( emit, insn, tex_result ))
1405 return FALSE;
1406 break;
1407 default:
1408 assert(0);
1409 }
1410
1411
1412 if (compare) {
1413 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1414 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1415 struct src_register one =
1416 scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
1417
1418 /* Divide texcoord R by Q */
1419 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1420 src0_zdivw,
1421 scalar(src0, TGSI_SWIZZLE_W) ))
1422 return FALSE;
1423
1424 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1425 src0_zdivw,
1426 scalar(src0, TGSI_SWIZZLE_Z),
1427 src(src0_zdivw) ))
1428 return FALSE;
1429
1430 if (!emit_select(
1431 emit,
1432 emit->key.fkey.tex[src1.base.num].compare_func,
1433 dst,
1434 src(src0_zdivw),
1435 tex_src_x))
1436 return FALSE;
1437
1438 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1439 writemask( dst, TGSI_WRITEMASK_W),
1440 one );
1441 }
1442 else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW)
1443 {
1444 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1445 return FALSE;
1446 }
1447
1448 return TRUE;
1449 }
1450
1451 static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
1452 const struct tgsi_full_instruction *insn )
1453 {
1454 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1455 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1456 struct src_register const_int = get_loop_const( emit );
1457
1458 emit->dynamic_branching_level++;
1459
1460 return (emit_instruction( emit, inst ) &&
1461 emit_src( emit, loop_reg ) &&
1462 emit_src( emit, const_int ) );
1463 }
1464
1465 static boolean emit_endloop2( struct svga_shader_emitter *emit,
1466 const struct tgsi_full_instruction *insn )
1467 {
1468 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1469
1470 emit->dynamic_branching_level--;
1471
1472 return emit_instruction( emit, inst );
1473 }
1474
1475 static boolean emit_brk( struct svga_shader_emitter *emit,
1476 const struct tgsi_full_instruction *insn )
1477 {
1478 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1479 return emit_instruction( emit, inst );
1480 }
1481
1482 static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
1483 unsigned opcode,
1484 const struct tgsi_full_instruction *insn )
1485 {
1486 SVGA3dShaderInstToken inst;
1487 SVGA3dShaderDestToken dst;
1488 struct src_register src;
1489
1490 inst = inst_token( opcode );
1491 dst = translate_dst_register( emit, insn, 0 );
1492 src = translate_src_register( emit, &insn->Src[0] );
1493 src = scalar( src, TGSI_SWIZZLE_X );
1494
1495 return submit_op1( emit, inst, dst, src );
1496 }
1497
1498
1499 static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
1500 unsigned opcode,
1501 const struct tgsi_full_instruction *insn )
1502 {
1503 const struct tgsi_full_src_register *src = insn->Src;
1504 SVGA3dShaderInstToken inst;
1505 SVGA3dShaderDestToken dst;
1506
1507 inst = inst_token( opcode );
1508 dst = translate_dst_register( emit, insn, 0 );
1509
1510 switch (insn->Instruction.NumSrcRegs) {
1511 case 0:
1512 return submit_op0( emit, inst, dst );
1513 case 1:
1514 return submit_op1( emit, inst, dst,
1515 translate_src_register( emit, &src[0] ));
1516 case 2:
1517 return submit_op2( emit, inst, dst,
1518 translate_src_register( emit, &src[0] ),
1519 translate_src_register( emit, &src[1] ) );
1520 case 3:
1521 return submit_op3( emit, inst, dst,
1522 translate_src_register( emit, &src[0] ),
1523 translate_src_register( emit, &src[1] ),
1524 translate_src_register( emit, &src[2] ) );
1525 default:
1526 assert(0);
1527 return FALSE;
1528 }
1529 }
1530
1531
1532 static boolean emit_deriv(struct svga_shader_emitter *emit,
1533 const struct tgsi_full_instruction *insn )
1534 {
1535 if (emit->dynamic_branching_level > 0 &&
1536 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
1537 {
1538 struct src_register zero = get_zero_immediate( emit );
1539 SVGA3dShaderDestToken dst =
1540 translate_dst_register( emit, insn, 0 );
1541
1542 /* Deriv opcodes not valid inside dynamic branching, workaround
1543 * by zeroing out the destination.
1544 */
1545 if (!submit_op1(emit,
1546 inst_token( SVGA3DOP_MOV ),
1547 dst,
1548 scalar(zero, TGSI_SWIZZLE_X)))
1549 return FALSE;
1550
1551 return TRUE;
1552 }
1553 else {
1554 unsigned opcode;
1555
1556 switch (insn->Instruction.Opcode) {
1557 case TGSI_OPCODE_DDX:
1558 opcode = SVGA3DOP_DSX;
1559 break;
1560 case TGSI_OPCODE_DDY:
1561 opcode = SVGA3DOP_DSY;
1562 break;
1563 default:
1564 return FALSE;
1565 }
1566
1567 return emit_simple_instruction( emit, opcode, insn );
1568 }
1569 }
1570
1571 static boolean emit_arl(struct svga_shader_emitter *emit,
1572 const struct tgsi_full_instruction *insn)
1573 {
1574 ++emit->current_arl;
1575 if (svga_arl_needs_adjustment( emit )) {
1576 return emit_fake_arl( emit, insn );
1577 } else {
1578 /* no need to adjust, just emit straight arl */
1579 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
1580 }
1581 }
1582
1583 static boolean alias_src_dst( struct src_register src,
1584 SVGA3dShaderDestToken dst )
1585 {
1586 if (src.base.num != dst.num)
1587 return FALSE;
1588
1589 if (SVGA3dShaderGetRegType(dst.value) !=
1590 SVGA3dShaderGetRegType(src.base.value))
1591 return FALSE;
1592
1593 return TRUE;
1594 }
1595
1596 static boolean emit_pow(struct svga_shader_emitter *emit,
1597 const struct tgsi_full_instruction *insn)
1598 {
1599 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1600 struct src_register src0 = translate_src_register(
1601 emit, &insn->Src[0] );
1602 struct src_register src1 = translate_src_register(
1603 emit, &insn->Src[1] );
1604 boolean need_tmp = FALSE;
1605
1606 /* POW can only output to a temporary */
1607 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
1608 need_tmp = TRUE;
1609
1610 /* POW src1 must not be the same register as dst */
1611 if (alias_src_dst( src1, dst ))
1612 need_tmp = TRUE;
1613
1614 /* it's a scalar op */
1615 src0 = scalar( src0, TGSI_SWIZZLE_X );
1616 src1 = scalar( src1, TGSI_SWIZZLE_X );
1617
1618 if (need_tmp) {
1619 SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
1620
1621 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
1622 return FALSE;
1623
1624 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
1625 }
1626 else {
1627 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
1628 }
1629 }
1630
1631 static boolean emit_xpd(struct svga_shader_emitter *emit,
1632 const struct tgsi_full_instruction *insn)
1633 {
1634 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1635 const struct src_register src0 = translate_src_register(
1636 emit, &insn->Src[0] );
1637 const struct src_register src1 = translate_src_register(
1638 emit, &insn->Src[1] );
1639 boolean need_dst_tmp = FALSE;
1640
1641 /* XPD can only output to a temporary */
1642 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
1643 need_dst_tmp = TRUE;
1644
1645 /* The dst reg must not be the same as src0 or src1*/
1646 if (alias_src_dst(src0, dst) ||
1647 alias_src_dst(src1, dst))
1648 need_dst_tmp = TRUE;
1649
1650 if (need_dst_tmp) {
1651 SVGA3dShaderDestToken tmp = get_temp( emit );
1652
1653 /* Obey DX9 restrictions on mask:
1654 */
1655 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
1656
1657 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
1658 return FALSE;
1659
1660 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1661 return FALSE;
1662 }
1663 else {
1664 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
1665 return FALSE;
1666 }
1667
1668 /* Need to emit 1.0 to dst.w?
1669 */
1670 if (dst.mask & TGSI_WRITEMASK_W) {
1671 struct src_register zero = get_zero_immediate( emit );
1672
1673 if (!submit_op1(emit,
1674 inst_token( SVGA3DOP_MOV ),
1675 writemask(dst, TGSI_WRITEMASK_W),
1676 zero))
1677 return FALSE;
1678 }
1679
1680 return TRUE;
1681 }
1682
1683
1684 static boolean emit_lrp(struct svga_shader_emitter *emit,
1685 const struct tgsi_full_instruction *insn)
1686 {
1687 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1688 SVGA3dShaderDestToken tmp;
1689 const struct src_register src0 = translate_src_register(
1690 emit, &insn->Src[0] );
1691 const struct src_register src1 = translate_src_register(
1692 emit, &insn->Src[1] );
1693 const struct src_register src2 = translate_src_register(
1694 emit, &insn->Src[2] );
1695 boolean need_dst_tmp = FALSE;
1696
1697 /* The dst reg must not be the same as src0 or src2 */
1698 if (alias_src_dst(src0, dst) ||
1699 alias_src_dst(src2, dst))
1700 need_dst_tmp = TRUE;
1701
1702 if (need_dst_tmp) {
1703 tmp = get_temp( emit );
1704 tmp.mask = dst.mask;
1705 }
1706 else {
1707 tmp = dst;
1708 }
1709
1710 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
1711 return FALSE;
1712
1713 if (need_dst_tmp) {
1714 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
1715 return FALSE;
1716 }
1717
1718 return TRUE;
1719 }
1720
1721
1722 static boolean emit_dst_insn(struct svga_shader_emitter *emit,
1723 const struct tgsi_full_instruction *insn )
1724 {
1725 if (emit->unit == PIPE_SHADER_VERTEX) {
1726 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
1727 */
1728 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
1729 }
1730 else {
1731
1732 /* result[0] = 1 * 1;
1733 * result[1] = a[1] * b[1];
1734 * result[2] = a[2] * 1;
1735 * result[3] = 1 * b[3];
1736 */
1737
1738 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1739 SVGA3dShaderDestToken tmp;
1740 const struct src_register src0 = translate_src_register(
1741 emit, &insn->Src[0] );
1742 const struct src_register src1 = translate_src_register(
1743 emit, &insn->Src[1] );
1744 struct src_register zero = get_zero_immediate( emit );
1745 boolean need_tmp = FALSE;
1746
1747 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
1748 alias_src_dst(src0, dst) ||
1749 alias_src_dst(src1, dst))
1750 need_tmp = TRUE;
1751
1752 if (need_tmp) {
1753 tmp = get_temp( emit );
1754 }
1755 else {
1756 tmp = dst;
1757 }
1758
1759 /* tmp.xw = 1.0
1760 */
1761 if (tmp.mask & TGSI_WRITEMASK_XW) {
1762 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1763 writemask(tmp, TGSI_WRITEMASK_XW ),
1764 scalar( zero, 3 )))
1765 return FALSE;
1766 }
1767
1768 /* tmp.yz = src0
1769 */
1770 if (tmp.mask & TGSI_WRITEMASK_YZ) {
1771 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1772 writemask(tmp, TGSI_WRITEMASK_YZ ),
1773 src0))
1774 return FALSE;
1775 }
1776
1777 /* tmp.yw = tmp * src1
1778 */
1779 if (tmp.mask & TGSI_WRITEMASK_YW) {
1780 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1781 writemask(tmp, TGSI_WRITEMASK_YW ),
1782 src(tmp),
1783 src1))
1784 return FALSE;
1785 }
1786
1787 /* dst = tmp
1788 */
1789 if (need_tmp) {
1790 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1791 dst,
1792 src(tmp)))
1793 return FALSE;
1794 }
1795 }
1796
1797 return TRUE;
1798 }
1799
1800
1801 static boolean emit_exp(struct svga_shader_emitter *emit,
1802 const struct tgsi_full_instruction *insn)
1803 {
1804 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1805 struct src_register src0 =
1806 translate_src_register( emit, &insn->Src[0] );
1807 struct src_register zero = get_zero_immediate( emit );
1808 SVGA3dShaderDestToken fraction;
1809
1810 if (dst.mask & TGSI_WRITEMASK_Y)
1811 fraction = dst;
1812 else if (dst.mask & TGSI_WRITEMASK_X)
1813 fraction = get_temp( emit );
1814
1815 /* If y is being written, fill it with src0 - floor(src0).
1816 */
1817 if (dst.mask & TGSI_WRITEMASK_XY) {
1818 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
1819 writemask( fraction, TGSI_WRITEMASK_Y ),
1820 src0 ))
1821 return FALSE;
1822 }
1823
1824 /* If x is being written, fill it with 2 ^ floor(src0).
1825 */
1826 if (dst.mask & TGSI_WRITEMASK_X) {
1827 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
1828 writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
1829 src0,
1830 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
1831 return FALSE;
1832
1833 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
1834 writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
1835 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
1836 return FALSE;
1837
1838 if (!(dst.mask & TGSI_WRITEMASK_Y))
1839 release_temp( emit, fraction );
1840 }
1841
1842 /* If z is being written, fill it with 2 ^ src0 (partial precision).
1843 */
1844 if (dst.mask & TGSI_WRITEMASK_Z) {
1845 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
1846 writemask( dst, dst.mask & TGSI_WRITEMASK_Z ),
1847 src0 ) )
1848 return FALSE;
1849 }
1850
1851 /* If w is being written, fill it with one.
1852 */
1853 if (dst.mask & TGSI_WRITEMASK_W) {
1854 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1855 writemask(dst, TGSI_WRITEMASK_W),
1856 scalar( zero, TGSI_SWIZZLE_W ) ))
1857 return FALSE;
1858 }
1859
1860 return TRUE;
1861 }
1862
1863 static boolean emit_lit(struct svga_shader_emitter *emit,
1864 const struct tgsi_full_instruction *insn )
1865 {
1866 if (emit->unit == PIPE_SHADER_VERTEX) {
1867 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
1868 */
1869 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
1870 }
1871 else {
1872
1873 /* D3D vs. GL semantics can be fairly easily accomodated by
1874 * variations on this sequence.
1875 *
1876 * GL:
1877 * tmp.y = src.x
1878 * tmp.z = pow(src.y,src.w)
1879 * p0 = src0.xxxx > 0
1880 * result = zero.wxxw
1881 * (p0) result.yz = tmp
1882 *
1883 * D3D:
1884 * tmp.y = src.x
1885 * tmp.z = pow(src.y,src.w)
1886 * p0 = src0.xxyy > 0
1887 * result = zero.wxxw
1888 * (p0) result.yz = tmp
1889 *
1890 * Will implement the GL version for now.
1891 */
1892
1893 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1894 SVGA3dShaderDestToken tmp = get_temp( emit );
1895 const struct src_register src0 = translate_src_register(
1896 emit, &insn->Src[0] );
1897 struct src_register zero = get_zero_immediate( emit );
1898
1899 /* tmp = pow(src.y, src.w)
1900 */
1901 if (dst.mask & TGSI_WRITEMASK_Z) {
1902 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
1903 tmp,
1904 scalar(src0, 1),
1905 scalar(src0, 3)))
1906 return FALSE;
1907 }
1908
1909 /* tmp.y = src.x
1910 */
1911 if (dst.mask & TGSI_WRITEMASK_Y) {
1912 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1913 writemask(tmp, TGSI_WRITEMASK_Y ),
1914 scalar(src0, 0)))
1915 return FALSE;
1916 }
1917
1918 /* Can't quite do this with emit conditional due to the extra
1919 * writemask on the predicated mov:
1920 */
1921 {
1922 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1923 SVGA3dShaderInstToken setp_token, mov_token;
1924 struct src_register predsrc;
1925
1926 setp_token = inst_token( SVGA3DOP_SETP );
1927 mov_token = inst_token( SVGA3DOP_MOV );
1928
1929 setp_token.control = SVGA3DOPCOMP_GT;
1930
1931 /* D3D vs GL semantics:
1932 */
1933 if (0)
1934 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
1935 else
1936 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
1937
1938 /* SETP src0.xxyy, GT, {0}.x */
1939 if (!submit_op2( emit, setp_token, pred_reg,
1940 predsrc,
1941 swizzle(zero, 0, 0, 0, 0) ))
1942 return FALSE;
1943
1944 /* MOV dst, fail */
1945 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
1946 swizzle(zero, 3, 0, 0, 3 )))
1947 return FALSE;
1948
1949 /* MOV dst.yz, tmp (predicated)
1950 *
1951 * Note that the predicate reg (and possible modifiers) is passed
1952 * as the first source argument.
1953 */
1954 if (dst.mask & TGSI_WRITEMASK_YZ) {
1955 mov_token.predicated = 1;
1956 if (!submit_op2( emit, mov_token,
1957 writemask(dst, TGSI_WRITEMASK_YZ),
1958 src( pred_reg ), src( tmp ) ))
1959 return FALSE;
1960 }
1961 }
1962 }
1963
1964 return TRUE;
1965 }
1966
1967
1968
1969
1970 static boolean emit_ex2( struct svga_shader_emitter *emit,
1971 const struct tgsi_full_instruction *insn )
1972 {
1973 SVGA3dShaderInstToken inst;
1974 SVGA3dShaderDestToken dst;
1975 struct src_register src0;
1976
1977 inst = inst_token( SVGA3DOP_EXP );
1978 dst = translate_dst_register( emit, insn, 0 );
1979 src0 = translate_src_register( emit, &insn->Src[0] );
1980 src0 = scalar( src0, TGSI_SWIZZLE_X );
1981
1982 if (dst.mask != TGSI_WRITEMASK_XYZW) {
1983 SVGA3dShaderDestToken tmp = get_temp( emit );
1984
1985 if (!submit_op1( emit, inst, tmp, src0 ))
1986 return FALSE;
1987
1988 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1989 dst,
1990 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
1991 }
1992
1993 return submit_op1( emit, inst, dst, src0 );
1994 }
1995
1996
1997 static boolean emit_log(struct svga_shader_emitter *emit,
1998 const struct tgsi_full_instruction *insn)
1999 {
2000 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2001 struct src_register src0 =
2002 translate_src_register( emit, &insn->Src[0] );
2003 struct src_register zero = get_zero_immediate( emit );
2004 SVGA3dShaderDestToken abs_tmp;
2005 struct src_register abs_src0;
2006 SVGA3dShaderDestToken log2_abs;
2007
2008 if (dst.mask & TGSI_WRITEMASK_Z)
2009 log2_abs = dst;
2010 else if (dst.mask & TGSI_WRITEMASK_XY)
2011 log2_abs = get_temp( emit );
2012
2013 /* If z is being written, fill it with log2( abs( src0 ) ).
2014 */
2015 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2016 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2017 abs_src0 = src0;
2018 else {
2019 abs_tmp = get_temp( emit );
2020
2021 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2022 abs_tmp,
2023 src0 ) )
2024 return FALSE;
2025
2026 abs_src0 = src( abs_tmp );
2027 }
2028
2029 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2030
2031 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2032 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2033 abs_src0 ) )
2034 return FALSE;
2035 }
2036
2037 if (dst.mask & TGSI_WRITEMASK_XY) {
2038 SVGA3dShaderDestToken floor_log2;
2039
2040 if (dst.mask & TGSI_WRITEMASK_X)
2041 floor_log2 = dst;
2042 else
2043 floor_log2 = get_temp( emit );
2044
2045 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2046 */
2047 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2048 writemask( floor_log2, TGSI_WRITEMASK_X ),
2049 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2050 return FALSE;
2051
2052 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2053 writemask( floor_log2, TGSI_WRITEMASK_X ),
2054 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2055 negate( src( floor_log2 ) ) ) )
2056 return FALSE;
2057
2058 /* If y is being written, fill it with
2059 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2060 */
2061 if (dst.mask & TGSI_WRITEMASK_Y) {
2062 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2063 writemask( dst, TGSI_WRITEMASK_Y ),
2064 negate( scalar( src( floor_log2 ),
2065 TGSI_SWIZZLE_X ) ) ) )
2066 return FALSE;
2067
2068 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2069 writemask( dst, TGSI_WRITEMASK_Y ),
2070 src( dst ),
2071 abs_src0 ) )
2072 return FALSE;
2073 }
2074
2075 if (!(dst.mask & TGSI_WRITEMASK_X))
2076 release_temp( emit, floor_log2 );
2077
2078 if (!(dst.mask & TGSI_WRITEMASK_Z))
2079 release_temp( emit, log2_abs );
2080 }
2081
2082 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2083 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2084 release_temp( emit, abs_tmp );
2085
2086 /* If w is being written, fill it with one.
2087 */
2088 if (dst.mask & TGSI_WRITEMASK_W) {
2089 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2090 writemask(dst, TGSI_WRITEMASK_W),
2091 scalar( zero, TGSI_SWIZZLE_W ) ))
2092 return FALSE;
2093 }
2094
2095 return TRUE;
2096 }
2097
2098
2099 static boolean emit_bgnsub( struct svga_shader_emitter *emit,
2100 unsigned position,
2101 const struct tgsi_full_instruction *insn )
2102 {
2103 unsigned i;
2104
2105 /* Note that we've finished the main function and are now emitting
2106 * subroutines. This affects how we terminate the generated
2107 * shader.
2108 */
2109 emit->in_main_func = FALSE;
2110
2111 for (i = 0; i < emit->nr_labels; i++) {
2112 if (emit->label[i] == position) {
2113 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2114 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2115 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2116 }
2117 }
2118
2119 assert(0);
2120 return TRUE;
2121 }
2122
2123 static boolean emit_call( struct svga_shader_emitter *emit,
2124 const struct tgsi_full_instruction *insn )
2125 {
2126 unsigned position = insn->Label.Label;
2127 unsigned i;
2128
2129 for (i = 0; i < emit->nr_labels; i++) {
2130 if (emit->label[i] == position)
2131 break;
2132 }
2133
2134 if (emit->nr_labels == Elements(emit->label))
2135 return FALSE;
2136
2137 if (i == emit->nr_labels) {
2138 emit->label[i] = position;
2139 emit->nr_labels++;
2140 }
2141
2142 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2143 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2144 }
2145
2146
2147 static boolean emit_end( struct svga_shader_emitter *emit )
2148 {
2149 if (emit->unit == PIPE_SHADER_VERTEX) {
2150 return emit_vs_postamble( emit );
2151 }
2152 else {
2153 return emit_ps_postamble( emit );
2154 }
2155 }
2156
2157
2158
2159 static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
2160 unsigned position,
2161 const struct tgsi_full_instruction *insn )
2162 {
2163 switch (insn->Instruction.Opcode) {
2164
2165 case TGSI_OPCODE_ARL:
2166 return emit_arl( emit, insn );
2167
2168 case TGSI_OPCODE_TEX:
2169 case TGSI_OPCODE_TXB:
2170 case TGSI_OPCODE_TXP:
2171 case TGSI_OPCODE_TXL:
2172 case TGSI_OPCODE_TXD:
2173 return emit_tex( emit, insn );
2174
2175 case TGSI_OPCODE_DDX:
2176 case TGSI_OPCODE_DDY:
2177 return emit_deriv( emit, insn );
2178
2179 case TGSI_OPCODE_BGNSUB:
2180 return emit_bgnsub( emit, position, insn );
2181
2182 case TGSI_OPCODE_ENDSUB:
2183 return TRUE;
2184
2185 case TGSI_OPCODE_CAL:
2186 return emit_call( emit, insn );
2187
2188 case TGSI_OPCODE_FLR:
2189 case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */
2190 return emit_floor( emit, insn );
2191
2192 case TGSI_OPCODE_CMP:
2193 return emit_cmp( emit, insn );
2194
2195 case TGSI_OPCODE_DIV:
2196 return emit_div( emit, insn );
2197
2198 case TGSI_OPCODE_DP2:
2199 return emit_dp2( emit, insn );
2200
2201 case TGSI_OPCODE_DPH:
2202 return emit_dph( emit, insn );
2203
2204 case TGSI_OPCODE_NRM:
2205 return emit_nrm( emit, insn );
2206
2207 case TGSI_OPCODE_COS:
2208 return emit_cos( emit, insn );
2209
2210 case TGSI_OPCODE_SIN:
2211 return emit_sin( emit, insn );
2212
2213 case TGSI_OPCODE_SCS:
2214 return emit_sincos( emit, insn );
2215
2216 case TGSI_OPCODE_END:
2217 /* TGSI always finishes the main func with an END */
2218 return emit_end( emit );
2219
2220 case TGSI_OPCODE_KIL:
2221 return emit_kil( emit, insn );
2222
2223 /* Selection opcodes. The underlying language is fairly
2224 * non-orthogonal about these.
2225 */
2226 case TGSI_OPCODE_SEQ:
2227 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2228
2229 case TGSI_OPCODE_SNE:
2230 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2231
2232 case TGSI_OPCODE_SGT:
2233 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2234
2235 case TGSI_OPCODE_SGE:
2236 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2237
2238 case TGSI_OPCODE_SLT:
2239 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2240
2241 case TGSI_OPCODE_SLE:
2242 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2243
2244 case TGSI_OPCODE_SUB:
2245 return emit_sub( emit, insn );
2246
2247 case TGSI_OPCODE_POW:
2248 return emit_pow( emit, insn );
2249
2250 case TGSI_OPCODE_EX2:
2251 return emit_ex2( emit, insn );
2252
2253 case TGSI_OPCODE_EXP:
2254 return emit_exp( emit, insn );
2255
2256 case TGSI_OPCODE_LOG:
2257 return emit_log( emit, insn );
2258
2259 case TGSI_OPCODE_LG2:
2260 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2261
2262 case TGSI_OPCODE_RSQ:
2263 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2264
2265 case TGSI_OPCODE_RCP:
2266 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2267
2268 case TGSI_OPCODE_CONT:
2269 case TGSI_OPCODE_RET:
2270 /* This is a noop -- we tell mesa that we can't support RET
2271 * within a function (early return), so this will always be
2272 * followed by an ENDSUB.
2273 */
2274 return TRUE;
2275
2276 /* These aren't actually used by any of the frontends we care
2277 * about:
2278 */
2279 case TGSI_OPCODE_CLAMP:
2280 case TGSI_OPCODE_ROUND:
2281 case TGSI_OPCODE_AND:
2282 case TGSI_OPCODE_OR:
2283 case TGSI_OPCODE_I2F:
2284 case TGSI_OPCODE_NOT:
2285 case TGSI_OPCODE_SHL:
2286 case TGSI_OPCODE_ISHR:
2287 case TGSI_OPCODE_XOR:
2288 return FALSE;
2289
2290 case TGSI_OPCODE_IF:
2291 return emit_if( emit, insn );
2292 case TGSI_OPCODE_ELSE:
2293 return emit_else( emit, insn );
2294 case TGSI_OPCODE_ENDIF:
2295 return emit_endif( emit, insn );
2296
2297 case TGSI_OPCODE_BGNLOOP:
2298 return emit_bgnloop2( emit, insn );
2299 case TGSI_OPCODE_ENDLOOP:
2300 return emit_endloop2( emit, insn );
2301 case TGSI_OPCODE_BRK:
2302 return emit_brk( emit, insn );
2303
2304 case TGSI_OPCODE_XPD:
2305 return emit_xpd( emit, insn );
2306
2307 case TGSI_OPCODE_KILP:
2308 return emit_kilp( emit, insn );
2309
2310 case TGSI_OPCODE_DST:
2311 return emit_dst_insn( emit, insn );
2312
2313 case TGSI_OPCODE_LIT:
2314 return emit_lit( emit, insn );
2315
2316 case TGSI_OPCODE_LRP:
2317 return emit_lrp( emit, insn );
2318
2319 default: {
2320 unsigned opcode = translate_opcode(insn->Instruction.Opcode);
2321
2322 if (opcode == SVGA3DOP_LAST_INST)
2323 return FALSE;
2324
2325 if (!emit_simple_instruction( emit, opcode, insn ))
2326 return FALSE;
2327 }
2328 }
2329
2330 return TRUE;
2331 }
2332
2333
2334 static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
2335 struct tgsi_full_immediate *imm)
2336 {
2337 static const float id[4] = {0,0,0,1};
2338 float value[4];
2339 unsigned i;
2340
2341 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
2342 for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
2343 value[i] = imm->u[i].Float;
2344
2345 for ( ; i < 4; i++ )
2346 value[i] = id[i];
2347
2348 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2349 emit->imm_start + emit->internal_imm_count++,
2350 value[0], value[1], value[2], value[3]);
2351 }
2352
2353 static boolean make_immediate( struct svga_shader_emitter *emit,
2354 float a,
2355 float b,
2356 float c,
2357 float d,
2358 struct src_register *out )
2359 {
2360 unsigned idx = emit->nr_hw_const++;
2361
2362 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
2363 idx, a, b, c, d ))
2364 return FALSE;
2365
2366 *out = src_register( SVGA3DREG_CONST, idx );
2367
2368 return TRUE;
2369 }
2370
2371 static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
2372 {
2373 if (!emit->key.vkey.need_prescale) {
2374 if (!make_immediate( emit, 0, 0, .5, .5,
2375 &emit->imm_0055))
2376 return FALSE;
2377 }
2378
2379 return TRUE;
2380 }
2381
2382 static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
2383 {
2384 unsigned i;
2385
2386 /* For SM20, need to initialize the temporaries we're using to hold
2387 * color outputs to some value. Shaders which don't set all of
2388 * these values are likely to be rejected by the DX9 runtime.
2389 */
2390 if (!emit->use_sm30) {
2391 struct src_register zero = get_zero_immediate( emit );
2392 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2393 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2394
2395 if (!submit_op1( emit,
2396 inst_token(SVGA3DOP_MOV),
2397 emit->temp_col[i],
2398 zero ))
2399 return FALSE;
2400 }
2401 }
2402 }
2403
2404 return TRUE;
2405 }
2406
2407 static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
2408 {
2409 unsigned i;
2410
2411 /* PS oDepth is incredibly fragile and it's very hard to catch the
2412 * types of usage that break it during shader emit. Easier just to
2413 * redirect the main program to a temporary and then only touch
2414 * oDepth with a hand-crafted MOV below.
2415 */
2416 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
2417
2418 if (!submit_op1( emit,
2419 inst_token(SVGA3DOP_MOV),
2420 emit->true_pos,
2421 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
2422 return FALSE;
2423 }
2424
2425 /* Similarly for SM20 color outputs... Luckily SM30 isn't so
2426 * fragile.
2427 */
2428 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
2429 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
2430
2431 /* Potentially override output colors with white for XOR
2432 * logicop workaround.
2433 */
2434 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2435 emit->key.fkey.white_fragments) {
2436
2437 struct src_register one = scalar( get_zero_immediate( emit ),
2438 TGSI_SWIZZLE_W );
2439
2440 if (!submit_op1( emit,
2441 inst_token(SVGA3DOP_MOV),
2442 emit->true_col[i],
2443 one ))
2444 return FALSE;
2445 }
2446 else {
2447 if (!submit_op1( emit,
2448 inst_token(SVGA3DOP_MOV),
2449 emit->true_col[i],
2450 src(emit->temp_col[i]) ))
2451 return FALSE;
2452 }
2453 }
2454 }
2455
2456 return TRUE;
2457 }
2458
2459 static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
2460 {
2461 /* PSIZ output is incredibly fragile and it's very hard to catch
2462 * the types of usage that break it during shader emit. Easier
2463 * just to redirect the main program to a temporary and then only
2464 * touch PSIZ with a hand-crafted MOV below.
2465 */
2466 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
2467
2468 if (!submit_op1( emit,
2469 inst_token(SVGA3DOP_MOV),
2470 emit->true_psiz,
2471 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
2472 return FALSE;
2473 }
2474
2475 /* Need to perform various manipulations on vertex position to cope
2476 * with the different GL and D3D clip spaces.
2477 */
2478 if (emit->key.vkey.need_prescale) {
2479 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2480 SVGA3dShaderDestToken pos = emit->true_pos;
2481 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
2482 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
2483 offset + 0 );
2484 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
2485 offset + 1 );
2486
2487 /* MUL temp_pos.xyz, temp_pos, prescale.scale
2488 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
2489 * --> Note that prescale.trans.w == 0
2490 */
2491 if (!submit_op2( emit,
2492 inst_token(SVGA3DOP_MUL),
2493 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
2494 src(temp_pos),
2495 prescale_scale ))
2496 return FALSE;
2497
2498 if (!submit_op3( emit,
2499 inst_token(SVGA3DOP_MAD),
2500 pos,
2501 swizzle(src(temp_pos), 3, 3, 3, 3),
2502 prescale_trans,
2503 src(temp_pos)))
2504 return FALSE;
2505 }
2506 else {
2507 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
2508 SVGA3dShaderDestToken pos = emit->true_pos;
2509 struct src_register imm_0055 = emit->imm_0055;
2510
2511 /* Adjust GL clipping coordinate space to hardware (D3D-style):
2512 *
2513 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
2514 * MOV result.position, temp_pos
2515 */
2516 if (!submit_op2( emit,
2517 inst_token(SVGA3DOP_DP4),
2518 writemask(temp_pos, TGSI_WRITEMASK_Z),
2519 imm_0055,
2520 src(temp_pos) ))
2521 return FALSE;
2522
2523 if (!submit_op1( emit,
2524 inst_token(SVGA3DOP_MOV),
2525 pos,
2526 src(temp_pos) ))
2527 return FALSE;
2528 }
2529
2530 return TRUE;
2531 }
2532
2533 /*
2534 0: IF VFACE :4
2535 1: COLOR = FrontColor;
2536 2: ELSE
2537 3: COLOR = BackColor;
2538 4: ENDIF
2539 */
2540 static boolean emit_light_twoside( struct svga_shader_emitter *emit )
2541 {
2542 struct src_register vface, zero;
2543 struct src_register front[2];
2544 struct src_register back[2];
2545 SVGA3dShaderDestToken color[2];
2546 int count = emit->internal_color_count;
2547 int i;
2548 SVGA3dShaderInstToken if_token;
2549
2550 if (count == 0)
2551 return TRUE;
2552
2553 vface = get_vface( emit );
2554 zero = get_zero_immediate( emit );
2555
2556 /* Can't use get_temp() to allocate the color reg as such
2557 * temporaries will be reclaimed after each instruction by the call
2558 * to reset_temp_regs().
2559 */
2560 for (i = 0; i < count; i++) {
2561 color[i] = dst_register( SVGA3DREG_TEMP,
2562 emit->nr_hw_temp++ );
2563
2564 front[i] = emit->input_map[emit->internal_color_idx[i]];
2565
2566 /* Back is always the next input:
2567 */
2568 back[i] = front[i];
2569 back[i].base.num = front[i].base.num + 1;
2570
2571 /* Reassign the input_map to the actual front-face color:
2572 */
2573 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
2574 }
2575
2576 if_token = inst_token( SVGA3DOP_IFC );
2577
2578 if (emit->key.fkey.front_cw)
2579 if_token.control = SVGA3DOPCOMP_GT;
2580 else
2581 if_token.control = SVGA3DOPCOMP_LT;
2582
2583 zero = scalar(zero, TGSI_SWIZZLE_X);
2584
2585 if (!(emit_instruction( emit, if_token ) &&
2586 emit_src( emit, vface ) &&
2587 emit_src( emit, zero ) ))
2588 return FALSE;
2589
2590 for (i = 0; i < count; i++) {
2591 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
2592 return FALSE;
2593 }
2594
2595 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
2596 return FALSE;
2597
2598 for (i = 0; i < count; i++) {
2599 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
2600 return FALSE;
2601 }
2602
2603 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
2604 return FALSE;
2605
2606 return TRUE;
2607 }
2608
2609 /*
2610 0: SETP_GT TEMP, VFACE, 0
2611 where TEMP is a fake frontface register
2612 */
2613 static boolean emit_frontface( struct svga_shader_emitter *emit )
2614 {
2615 struct src_register vface, zero;
2616 SVGA3dShaderDestToken temp;
2617 struct src_register pass, fail;
2618
2619 vface = get_vface( emit );
2620 zero = get_zero_immediate( emit );
2621
2622 /* Can't use get_temp() to allocate the fake frontface reg as such
2623 * temporaries will be reclaimed after each instruction by the call
2624 * to reset_temp_regs().
2625 */
2626 temp = dst_register( SVGA3DREG_TEMP,
2627 emit->nr_hw_temp++ );
2628
2629 if (emit->key.fkey.front_cw) {
2630 pass = scalar( zero, TGSI_SWIZZLE_W );
2631 fail = scalar( zero, TGSI_SWIZZLE_X );
2632 } else {
2633 pass = scalar( zero, TGSI_SWIZZLE_X );
2634 fail = scalar( zero, TGSI_SWIZZLE_W );
2635 }
2636
2637 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
2638 temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
2639 pass, fail))
2640 return FALSE;
2641
2642 /* Reassign the input_map to the actual front-face color:
2643 */
2644 emit->input_map[emit->internal_frontface_idx] = src(temp);
2645
2646 return TRUE;
2647 }
2648
2649 static INLINE boolean
2650 needs_to_create_zero( struct svga_shader_emitter *emit )
2651 {
2652 int i;
2653
2654 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2655 if (!emit->use_sm30)
2656 return TRUE;
2657
2658 if (emit->key.fkey.light_twoside)
2659 return TRUE;
2660
2661 if (emit->key.fkey.white_fragments)
2662 return TRUE;
2663
2664 if (emit->emit_frontface)
2665 return TRUE;
2666
2667 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
2668 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
2669 return TRUE;
2670 }
2671
2672 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
2673 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
2674 emit->info.opcode_count[TGSI_OPCODE_BGNFOR] >= 1 ||
2675 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
2676 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
2677 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
2678 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
2679 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
2680 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
2681 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
2682 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
2683 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
2684 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
2685 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
2686 emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
2687 return TRUE;
2688
2689 for (i = 0; i < emit->key.fkey.num_textures; i++) {
2690 if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
2691 return TRUE;
2692 }
2693
2694 return FALSE;
2695 }
2696
2697 static INLINE boolean
2698 needs_to_create_loop_const( struct svga_shader_emitter *emit )
2699 {
2700 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
2701 }
2702
2703 static INLINE boolean
2704 needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
2705 {
2706 return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
2707 emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
2708 emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
2709 }
2710
2711 static INLINE boolean
2712 needs_to_create_arl_consts( struct svga_shader_emitter *emit )
2713 {
2714 return (emit->num_arl_consts > 0);
2715 }
2716
2717 static INLINE boolean
2718 pre_parse_add_indirect( struct svga_shader_emitter *emit,
2719 int num, int current_arl)
2720 {
2721 int i;
2722 assert(num < 0);
2723
2724 for (i = 0; i < emit->num_arl_consts; ++i) {
2725 if (emit->arl_consts[i].arl_num == current_arl)
2726 break;
2727 }
2728 /* new entry */
2729 if (emit->num_arl_consts == i) {
2730 ++emit->num_arl_consts;
2731 }
2732 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
2733 num :
2734 emit->arl_consts[i].number;
2735 emit->arl_consts[i].arl_num = current_arl;
2736 return TRUE;
2737 }
2738
2739 static boolean
2740 pre_parse_instruction( struct svga_shader_emitter *emit,
2741 const struct tgsi_full_instruction *insn,
2742 int current_arl)
2743 {
2744 if (insn->Src[0].Register.Indirect &&
2745 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
2746 const struct tgsi_full_src_register *reg = &insn->Src[0];
2747 if (reg->Register.Index < 0) {
2748 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2749 }
2750 }
2751
2752 if (insn->Src[1].Register.Indirect &&
2753 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
2754 const struct tgsi_full_src_register *reg = &insn->Src[1];
2755 if (reg->Register.Index < 0) {
2756 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2757 }
2758 }
2759
2760 if (insn->Src[2].Register.Indirect &&
2761 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
2762 const struct tgsi_full_src_register *reg = &insn->Src[2];
2763 if (reg->Register.Index < 0) {
2764 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
2765 }
2766 }
2767
2768 return TRUE;
2769 }
2770
2771 static boolean
2772 pre_parse_tokens( struct svga_shader_emitter *emit,
2773 const struct tgsi_token *tokens )
2774 {
2775 struct tgsi_parse_context parse;
2776 int current_arl = 0;
2777
2778 tgsi_parse_init( &parse, tokens );
2779
2780 while (!tgsi_parse_end_of_tokens( &parse )) {
2781 tgsi_parse_token( &parse );
2782 switch (parse.FullToken.Token.Type) {
2783 case TGSI_TOKEN_TYPE_IMMEDIATE:
2784 case TGSI_TOKEN_TYPE_DECLARATION:
2785 break;
2786 case TGSI_TOKEN_TYPE_INSTRUCTION:
2787 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
2788 TGSI_OPCODE_ARL) {
2789 ++current_arl;
2790 }
2791 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
2792 current_arl ))
2793 return FALSE;
2794 break;
2795 default:
2796 break;
2797 }
2798
2799 }
2800 return TRUE;
2801 }
2802
2803 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
2804
2805 {
2806 if (needs_to_create_zero( emit )) {
2807 create_zero_immediate( emit );
2808 }
2809 if (needs_to_create_loop_const( emit )) {
2810 create_loop_const( emit );
2811 }
2812 if (needs_to_create_sincos_consts( emit )) {
2813 create_sincos_consts( emit );
2814 }
2815 if (needs_to_create_arl_consts( emit )) {
2816 create_arl_consts( emit );
2817 }
2818
2819 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2820 if (!emit_ps_preamble( emit ))
2821 return FALSE;
2822
2823 if (emit->key.fkey.light_twoside) {
2824 if (!emit_light_twoside( emit ))
2825 return FALSE;
2826 }
2827 if (emit->emit_frontface) {
2828 if (!emit_frontface( emit ))
2829 return FALSE;
2830 }
2831 }
2832
2833 return TRUE;
2834 }
2835
2836 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
2837 const struct tgsi_token *tokens )
2838 {
2839 struct tgsi_parse_context parse;
2840 boolean ret = TRUE;
2841 boolean helpers_emitted = FALSE;
2842 unsigned line_nr = 0;
2843
2844 tgsi_parse_init( &parse, tokens );
2845 emit->internal_imm_count = 0;
2846
2847 if (emit->unit == PIPE_SHADER_VERTEX) {
2848 ret = emit_vs_preamble( emit );
2849 if (!ret)
2850 goto done;
2851 }
2852
2853 pre_parse_tokens(emit, tokens);
2854
2855 while (!tgsi_parse_end_of_tokens( &parse )) {
2856 tgsi_parse_token( &parse );
2857
2858 switch (parse.FullToken.Token.Type) {
2859 case TGSI_TOKEN_TYPE_IMMEDIATE:
2860 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
2861 if (!ret)
2862 goto done;
2863 break;
2864
2865 case TGSI_TOKEN_TYPE_DECLARATION:
2866 if (emit->use_sm30)
2867 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
2868 else
2869 ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
2870 if (!ret)
2871 goto done;
2872 break;
2873
2874 case TGSI_TOKEN_TYPE_INSTRUCTION:
2875 if (!helpers_emitted) {
2876 if (!svga_shader_emit_helpers( emit ))
2877 goto done;
2878 helpers_emitted = TRUE;
2879 }
2880 ret = svga_emit_instruction( emit,
2881 line_nr++,
2882 &parse.FullToken.FullInstruction );
2883 if (!ret)
2884 goto done;
2885 break;
2886 default:
2887 break;
2888 }
2889
2890 reset_temp_regs( emit );
2891 }
2892
2893 /* Need to terminate the current subroutine. Note that the
2894 * hardware doesn't tolerate shaders without sub-routines
2895 * terminating with RET+END.
2896 */
2897 if (!emit->in_main_func) {
2898 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
2899 if (!ret)
2900 goto done;
2901 }
2902
2903 assert(emit->dynamic_branching_level == 0);
2904
2905 /* Need to terminate the whole shader:
2906 */
2907 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
2908 if (!ret)
2909 goto done;
2910
2911 done:
2912 assert(ret);
2913 tgsi_parse_free( &parse );
2914 return ret;
2915 }
2916