098f6f5a28d7aa3a6a19d44bf4eb9bd1ba363ccb
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_vgpu10.c
1 /**********************************************************
2 * Copyright 1998-2013 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 /**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_two_side.h"
44 #include "tgsi/tgsi_aa_point.h"
45 #include "tgsi/tgsi_util.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48 #include "util/u_bitmask.h"
49 #include "util/u_debug.h"
50 #include "util/u_pstipple.h"
51
52 #include "svga_context.h"
53 #include "svga_debug.h"
54 #include "svga_link.h"
55 #include "svga_shader.h"
56 #include "svga_tgsi.h"
57
58 #include "VGPU10ShaderTokens.h"
59
60
61 #define INVALID_INDEX 99999
62 #define MAX_INTERNAL_TEMPS 3
63 #define MAX_SYSTEM_VALUES 4
64 #define MAX_IMMEDIATE_COUNT \
65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
66 #define MAX_TEMP_ARRAYS 64 /* Enough? */
67
68
69 /**
70 * Clipping is complicated. There's four different cases which we
71 * handle during VS/GS shader translation:
72 */
73 enum clipping_mode
74 {
75 CLIP_NONE, /**< No clipping enabled */
76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
77 * one or more user-defined clip planes are enabled. We
78 * generate extra code to emit clip distances.
79 */
80 CLIP_DISTANCE, /**< The shader already declares clip distance output
81 * registers and has code to write to them.
82 */
83 CLIP_VERTEX /**< The shader declares a clip vertex output register and
84 * has code that writes to the register. We convert the
85 * clipvertex position into one or more clip distances.
86 */
87 };
88
89
90 struct svga_shader_emitter_v10
91 {
92 /* The token output buffer */
93 unsigned size;
94 char *buf;
95 char *ptr;
96
97 /* Information about the shader and state (does not change) */
98 struct svga_compile_key key;
99 struct tgsi_shader_info info;
100 unsigned unit;
101
102 unsigned inst_start_token;
103 boolean discard_instruction; /**< throw away current instruction? */
104
105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
106 unsigned num_immediates; /**< Number of immediates emitted */
107 unsigned common_immediate_pos[8]; /**< literals for common immediates */
108 unsigned num_common_immediates;
109 boolean immediates_emitted;
110
111 unsigned num_outputs; /**< include any extra outputs */
112 /** The first extra output is reserved for
113 * non-adjusted vertex position for
114 * stream output purpose
115 */
116
117 /* Temporary Registers */
118 unsigned num_shader_temps; /**< num of temps used by original shader */
119 unsigned internal_temp_count; /**< currently allocated internal temps */
120 struct {
121 unsigned start, size;
122 } temp_arrays[MAX_TEMP_ARRAYS];
123 unsigned num_temp_arrays;
124
125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
126 struct {
127 unsigned arrayId, index;
128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
129
130 /** Number of constants used by original shader for each constant buffer.
131 * The size should probably always match with that of svga_state.constbufs.
132 */
133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
134
135 /* Samplers */
136 unsigned num_samplers;
137
138 /* Address regs (really implemented with temps) */
139 unsigned num_address_regs;
140 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
141
142 /* Output register usage masks */
143 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
144
145 /* To map TGSI system value index to VGPU shader input indexes */
146 ubyte system_value_indexes[MAX_SYSTEM_VALUES];
147
148 struct {
149 /* vertex position scale/translation */
150 unsigned out_index; /**< the real position output reg */
151 unsigned tmp_index; /**< the fake/temp position output reg */
152 unsigned so_index; /**< the non-adjusted position output reg */
153 unsigned prescale_scale_index, prescale_trans_index;
154 boolean need_prescale;
155 } vposition;
156
157 /* For vertex shaders only */
158 struct {
159 /* viewport constant */
160 unsigned viewport_index;
161
162 /* temp index of adjusted vertex attributes */
163 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
164 } vs;
165
166 /* For fragment shaders only */
167 struct {
168 /* apha test */
169 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
170 unsigned color_tmp_index; /**< fake/temp color output reg */
171 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
172
173 /* front-face */
174 unsigned face_input_index; /**< real fragment shader face reg (bool) */
175 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
176
177 unsigned pstipple_sampler_unit;
178
179 unsigned fragcoord_input_index; /**< real fragment position input reg */
180 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
181 } fs;
182
183 /* For geometry shaders only */
184 struct {
185 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
186 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
187 unsigned input_size; /**< size of input arrays */
188 unsigned prim_id_index; /**< primitive id register index */
189 unsigned max_out_vertices; /**< maximum number of output vertices */
190 } gs;
191
192 /* For vertex or geometry shaders */
193 enum clipping_mode clip_mode;
194 unsigned clip_dist_out_index; /**< clip distance output register index */
195 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
196 unsigned clip_dist_so_index; /**< clip distance shadow copy */
197
198 /** Index of temporary holding the clipvertex coordinate */
199 unsigned clip_vertex_out_index; /**< clip vertex output register index */
200 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
201
202 /* user clip plane constant slot indexes */
203 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
204
205 unsigned num_output_writes;
206 boolean constant_color_output;
207
208 boolean uses_flat_interp;
209
210 /* For all shaders: const reg index for RECT coord scaling */
211 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
212
213 /* For all shaders: const reg index for texture buffer size */
214 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
215
216 /* VS/GS/FS Linkage info */
217 struct shader_linkage linkage;
218
219 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
220 };
221
222
223 static boolean
224 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
225
226 static boolean
227 emit_vertex(struct svga_shader_emitter_v10 *emit,
228 const struct tgsi_full_instruction *inst);
229
230 static char err_buf[128];
231
232 static boolean
233 expand(struct svga_shader_emitter_v10 *emit)
234 {
235 char *new_buf;
236 unsigned newsize = emit->size * 2;
237
238 if (emit->buf != err_buf)
239 new_buf = REALLOC(emit->buf, emit->size, newsize);
240 else
241 new_buf = NULL;
242
243 if (!new_buf) {
244 emit->ptr = err_buf;
245 emit->buf = err_buf;
246 emit->size = sizeof(err_buf);
247 return FALSE;
248 }
249
250 emit->size = newsize;
251 emit->ptr = new_buf + (emit->ptr - emit->buf);
252 emit->buf = new_buf;
253 return TRUE;
254 }
255
256 /**
257 * Create and initialize a new svga_shader_emitter_v10 object.
258 */
259 static struct svga_shader_emitter_v10 *
260 alloc_emitter(void)
261 {
262 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
263
264 if (!emit)
265 return NULL;
266
267 /* to initialize the output buffer */
268 emit->size = 512;
269 if (!expand(emit)) {
270 FREE(emit);
271 return NULL;
272 }
273 return emit;
274 }
275
276 /**
277 * Free an svga_shader_emitter_v10 object.
278 */
279 static void
280 free_emitter(struct svga_shader_emitter_v10 *emit)
281 {
282 assert(emit);
283 FREE(emit->buf); /* will be NULL if translation succeeded */
284 FREE(emit);
285 }
286
287 static inline boolean
288 reserve(struct svga_shader_emitter_v10 *emit,
289 unsigned nr_dwords)
290 {
291 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
292 if (!expand(emit))
293 return FALSE;
294 }
295
296 return TRUE;
297 }
298
299 static boolean
300 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
301 {
302 if (!reserve(emit, 1))
303 return FALSE;
304
305 *(uint32 *)emit->ptr = dword;
306 emit->ptr += sizeof dword;
307 return TRUE;
308 }
309
310 static boolean
311 emit_dwords(struct svga_shader_emitter_v10 *emit,
312 const uint32 *dwords,
313 unsigned nr)
314 {
315 if (!reserve(emit, nr))
316 return FALSE;
317
318 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
319 emit->ptr += nr * sizeof *dwords;
320 return TRUE;
321 }
322
323 /** Return the number of tokens in the emitter's buffer */
324 static unsigned
325 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
326 {
327 return (emit->ptr - emit->buf) / sizeof(unsigned);
328 }
329
330
331 /**
332 * Check for register overflow. If we overflow we'll set an
333 * error flag. This function can be called for register declarations
334 * or use as src/dst instruction operands.
335 * \param type register type. One of VGPU10_OPERAND_TYPE_x
336 or VGPU10_OPCODE_DCL_x
337 * \param index the register index
338 */
339 static void
340 check_register_index(struct svga_shader_emitter_v10 *emit,
341 unsigned operandType, unsigned index)
342 {
343 bool overflow_before = emit->register_overflow;
344
345 switch (operandType) {
346 case VGPU10_OPERAND_TYPE_TEMP:
347 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
348 case VGPU10_OPCODE_DCL_TEMPS:
349 if (index >= VGPU10_MAX_TEMPS) {
350 emit->register_overflow = TRUE;
351 }
352 break;
353 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
354 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
355 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
356 emit->register_overflow = TRUE;
357 }
358 break;
359 case VGPU10_OPERAND_TYPE_INPUT:
360 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
361 case VGPU10_OPCODE_DCL_INPUT:
362 case VGPU10_OPCODE_DCL_INPUT_SGV:
363 case VGPU10_OPCODE_DCL_INPUT_SIV:
364 case VGPU10_OPCODE_DCL_INPUT_PS:
365 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
366 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
367 if ((emit->unit == PIPE_SHADER_VERTEX &&
368 index >= VGPU10_MAX_VS_INPUTS) ||
369 (emit->unit == PIPE_SHADER_GEOMETRY &&
370 index >= VGPU10_MAX_GS_INPUTS) ||
371 (emit->unit == PIPE_SHADER_FRAGMENT &&
372 index >= VGPU10_MAX_FS_INPUTS)) {
373 emit->register_overflow = TRUE;
374 }
375 break;
376 case VGPU10_OPERAND_TYPE_OUTPUT:
377 case VGPU10_OPCODE_DCL_OUTPUT:
378 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
379 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
380 if ((emit->unit == PIPE_SHADER_VERTEX &&
381 index >= VGPU10_MAX_VS_OUTPUTS) ||
382 (emit->unit == PIPE_SHADER_GEOMETRY &&
383 index >= VGPU10_MAX_GS_OUTPUTS) ||
384 (emit->unit == PIPE_SHADER_FRAGMENT &&
385 index >= VGPU10_MAX_FS_OUTPUTS)) {
386 emit->register_overflow = TRUE;
387 }
388 break;
389 case VGPU10_OPERAND_TYPE_SAMPLER:
390 case VGPU10_OPCODE_DCL_SAMPLER:
391 if (index >= VGPU10_MAX_SAMPLERS) {
392 emit->register_overflow = TRUE;
393 }
394 break;
395 case VGPU10_OPERAND_TYPE_RESOURCE:
396 case VGPU10_OPCODE_DCL_RESOURCE:
397 if (index >= VGPU10_MAX_RESOURCES) {
398 emit->register_overflow = TRUE;
399 }
400 break;
401 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
402 if (index >= MAX_IMMEDIATE_COUNT) {
403 emit->register_overflow = TRUE;
404 }
405 break;
406 default:
407 assert(0);
408 ; /* nothing */
409 }
410
411 if (emit->register_overflow && !overflow_before) {
412 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
413 operandType, index);
414 }
415 }
416
417
418 /**
419 * Examine misc state to determine the clipping mode.
420 */
421 static void
422 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
423 {
424 if (emit->info.num_written_clipdistance > 0) {
425 emit->clip_mode = CLIP_DISTANCE;
426 }
427 else if (emit->info.writes_clipvertex) {
428 emit->clip_mode = CLIP_VERTEX;
429 }
430 else if (emit->key.clip_plane_enable) {
431 emit->clip_mode = CLIP_LEGACY;
432 }
433 else {
434 emit->clip_mode = CLIP_NONE;
435 }
436 }
437
438
439 /**
440 * For clip distance register declarations and clip distance register
441 * writes we need to mask the declaration usage or instruction writemask
442 * (respectively) against the set of the really-enabled clipping planes.
443 *
444 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
445 * has a VS that writes to all 8 clip distance registers, but the plane enable
446 * flags are a subset of that.
447 *
448 * This function is used to apply the plane enable flags to the register
449 * declaration or instruction writemask.
450 *
451 * \param writemask the declaration usage mask or instruction writemask
452 * \param clip_reg_index which clip plane register is being declared/written.
453 * The legal values are 0 and 1 (two clip planes per
454 * register, for a total of 8 clip planes)
455 */
456 static unsigned
457 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
458 unsigned writemask, unsigned clip_reg_index)
459 {
460 unsigned shift;
461
462 assert(clip_reg_index < 2);
463
464 /* four clip planes per clip register: */
465 shift = clip_reg_index * 4;
466 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
467
468 return writemask;
469 }
470
471
472 /**
473 * Translate gallium shader type into VGPU10 type.
474 */
475 static VGPU10_PROGRAM_TYPE
476 translate_shader_type(unsigned type)
477 {
478 switch (type) {
479 case PIPE_SHADER_VERTEX:
480 return VGPU10_VERTEX_SHADER;
481 case PIPE_SHADER_GEOMETRY:
482 return VGPU10_GEOMETRY_SHADER;
483 case PIPE_SHADER_FRAGMENT:
484 return VGPU10_PIXEL_SHADER;
485 default:
486 assert(!"Unexpected shader type");
487 return VGPU10_VERTEX_SHADER;
488 }
489 }
490
491
492 /**
493 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
494 * Note: we only need to translate the opcodes for "simple" instructions,
495 * as seen below. All other opcodes are handled/translated specially.
496 */
497 static VGPU10_OPCODE_TYPE
498 translate_opcode(unsigned opcode)
499 {
500 switch (opcode) {
501 case TGSI_OPCODE_MOV:
502 return VGPU10_OPCODE_MOV;
503 case TGSI_OPCODE_MUL:
504 return VGPU10_OPCODE_MUL;
505 case TGSI_OPCODE_ADD:
506 return VGPU10_OPCODE_ADD;
507 case TGSI_OPCODE_DP3:
508 return VGPU10_OPCODE_DP3;
509 case TGSI_OPCODE_DP4:
510 return VGPU10_OPCODE_DP4;
511 case TGSI_OPCODE_MIN:
512 return VGPU10_OPCODE_MIN;
513 case TGSI_OPCODE_MAX:
514 return VGPU10_OPCODE_MAX;
515 case TGSI_OPCODE_MAD:
516 return VGPU10_OPCODE_MAD;
517 case TGSI_OPCODE_SQRT:
518 return VGPU10_OPCODE_SQRT;
519 case TGSI_OPCODE_FRC:
520 return VGPU10_OPCODE_FRC;
521 case TGSI_OPCODE_FLR:
522 return VGPU10_OPCODE_ROUND_NI;
523 case TGSI_OPCODE_FSEQ:
524 return VGPU10_OPCODE_EQ;
525 case TGSI_OPCODE_FSGE:
526 return VGPU10_OPCODE_GE;
527 case TGSI_OPCODE_FSNE:
528 return VGPU10_OPCODE_NE;
529 case TGSI_OPCODE_DDX:
530 return VGPU10_OPCODE_DERIV_RTX;
531 case TGSI_OPCODE_DDY:
532 return VGPU10_OPCODE_DERIV_RTY;
533 case TGSI_OPCODE_RET:
534 return VGPU10_OPCODE_RET;
535 case TGSI_OPCODE_DIV:
536 return VGPU10_OPCODE_DIV;
537 case TGSI_OPCODE_IDIV:
538 return VGPU10_OPCODE_IDIV;
539 case TGSI_OPCODE_DP2:
540 return VGPU10_OPCODE_DP2;
541 case TGSI_OPCODE_BRK:
542 return VGPU10_OPCODE_BREAK;
543 case TGSI_OPCODE_IF:
544 return VGPU10_OPCODE_IF;
545 case TGSI_OPCODE_ELSE:
546 return VGPU10_OPCODE_ELSE;
547 case TGSI_OPCODE_ENDIF:
548 return VGPU10_OPCODE_ENDIF;
549 case TGSI_OPCODE_CEIL:
550 return VGPU10_OPCODE_ROUND_PI;
551 case TGSI_OPCODE_I2F:
552 return VGPU10_OPCODE_ITOF;
553 case TGSI_OPCODE_NOT:
554 return VGPU10_OPCODE_NOT;
555 case TGSI_OPCODE_TRUNC:
556 return VGPU10_OPCODE_ROUND_Z;
557 case TGSI_OPCODE_SHL:
558 return VGPU10_OPCODE_ISHL;
559 case TGSI_OPCODE_AND:
560 return VGPU10_OPCODE_AND;
561 case TGSI_OPCODE_OR:
562 return VGPU10_OPCODE_OR;
563 case TGSI_OPCODE_XOR:
564 return VGPU10_OPCODE_XOR;
565 case TGSI_OPCODE_CONT:
566 return VGPU10_OPCODE_CONTINUE;
567 case TGSI_OPCODE_EMIT:
568 return VGPU10_OPCODE_EMIT;
569 case TGSI_OPCODE_ENDPRIM:
570 return VGPU10_OPCODE_CUT;
571 case TGSI_OPCODE_BGNLOOP:
572 return VGPU10_OPCODE_LOOP;
573 case TGSI_OPCODE_ENDLOOP:
574 return VGPU10_OPCODE_ENDLOOP;
575 case TGSI_OPCODE_ENDSUB:
576 return VGPU10_OPCODE_RET;
577 case TGSI_OPCODE_NOP:
578 return VGPU10_OPCODE_NOP;
579 case TGSI_OPCODE_BREAKC:
580 return VGPU10_OPCODE_BREAKC;
581 case TGSI_OPCODE_END:
582 return VGPU10_OPCODE_RET;
583 case TGSI_OPCODE_F2I:
584 return VGPU10_OPCODE_FTOI;
585 case TGSI_OPCODE_IMAX:
586 return VGPU10_OPCODE_IMAX;
587 case TGSI_OPCODE_IMIN:
588 return VGPU10_OPCODE_IMIN;
589 case TGSI_OPCODE_UDIV:
590 case TGSI_OPCODE_UMOD:
591 case TGSI_OPCODE_MOD:
592 return VGPU10_OPCODE_UDIV;
593 case TGSI_OPCODE_IMUL_HI:
594 return VGPU10_OPCODE_IMUL;
595 case TGSI_OPCODE_INEG:
596 return VGPU10_OPCODE_INEG;
597 case TGSI_OPCODE_ISHR:
598 return VGPU10_OPCODE_ISHR;
599 case TGSI_OPCODE_ISGE:
600 return VGPU10_OPCODE_IGE;
601 case TGSI_OPCODE_ISLT:
602 return VGPU10_OPCODE_ILT;
603 case TGSI_OPCODE_F2U:
604 return VGPU10_OPCODE_FTOU;
605 case TGSI_OPCODE_UADD:
606 return VGPU10_OPCODE_IADD;
607 case TGSI_OPCODE_U2F:
608 return VGPU10_OPCODE_UTOF;
609 case TGSI_OPCODE_UCMP:
610 return VGPU10_OPCODE_MOVC;
611 case TGSI_OPCODE_UMAD:
612 return VGPU10_OPCODE_UMAD;
613 case TGSI_OPCODE_UMAX:
614 return VGPU10_OPCODE_UMAX;
615 case TGSI_OPCODE_UMIN:
616 return VGPU10_OPCODE_UMIN;
617 case TGSI_OPCODE_UMUL:
618 case TGSI_OPCODE_UMUL_HI:
619 return VGPU10_OPCODE_UMUL;
620 case TGSI_OPCODE_USEQ:
621 return VGPU10_OPCODE_IEQ;
622 case TGSI_OPCODE_USGE:
623 return VGPU10_OPCODE_UGE;
624 case TGSI_OPCODE_USHR:
625 return VGPU10_OPCODE_USHR;
626 case TGSI_OPCODE_USLT:
627 return VGPU10_OPCODE_ULT;
628 case TGSI_OPCODE_USNE:
629 return VGPU10_OPCODE_INE;
630 case TGSI_OPCODE_SWITCH:
631 return VGPU10_OPCODE_SWITCH;
632 case TGSI_OPCODE_CASE:
633 return VGPU10_OPCODE_CASE;
634 case TGSI_OPCODE_DEFAULT:
635 return VGPU10_OPCODE_DEFAULT;
636 case TGSI_OPCODE_ENDSWITCH:
637 return VGPU10_OPCODE_ENDSWITCH;
638 case TGSI_OPCODE_FSLT:
639 return VGPU10_OPCODE_LT;
640 case TGSI_OPCODE_ROUND:
641 return VGPU10_OPCODE_ROUND_NE;
642 default:
643 assert(!"Unexpected TGSI opcode in translate_opcode()");
644 return VGPU10_OPCODE_NOP;
645 }
646 }
647
648
649 /**
650 * Translate a TGSI register file type into a VGPU10 operand type.
651 * \param array is the TGSI_FILE_TEMPORARY register an array?
652 */
653 static VGPU10_OPERAND_TYPE
654 translate_register_file(enum tgsi_file_type file, boolean array)
655 {
656 switch (file) {
657 case TGSI_FILE_CONSTANT:
658 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
659 case TGSI_FILE_INPUT:
660 return VGPU10_OPERAND_TYPE_INPUT;
661 case TGSI_FILE_OUTPUT:
662 return VGPU10_OPERAND_TYPE_OUTPUT;
663 case TGSI_FILE_TEMPORARY:
664 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
665 : VGPU10_OPERAND_TYPE_TEMP;
666 case TGSI_FILE_IMMEDIATE:
667 /* all immediates are 32-bit values at this time so
668 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
669 */
670 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
671 case TGSI_FILE_SAMPLER:
672 return VGPU10_OPERAND_TYPE_SAMPLER;
673 case TGSI_FILE_SYSTEM_VALUE:
674 return VGPU10_OPERAND_TYPE_INPUT;
675
676 /* XXX TODO more cases to finish */
677
678 default:
679 assert(!"Bad tgsi register file!");
680 return VGPU10_OPERAND_TYPE_NULL;
681 }
682 }
683
684
685 /**
686 * Emit a null dst register
687 */
688 static void
689 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
690 {
691 VGPU10OperandToken0 operand;
692
693 operand.value = 0;
694 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
695 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
696
697 emit_dword(emit, operand.value);
698 }
699
700
701 /**
702 * If the given register is a temporary, return the array ID.
703 * Else return zero.
704 */
705 static unsigned
706 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
707 unsigned file, unsigned index)
708 {
709 if (file == TGSI_FILE_TEMPORARY) {
710 return emit->temp_map[index].arrayId;
711 }
712 else {
713 return 0;
714 }
715 }
716
717
718 /**
719 * If the given register is a temporary, convert the index from a TGSI
720 * TEMPORARY index to a VGPU10 temp index.
721 */
722 static unsigned
723 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
724 unsigned file, unsigned index)
725 {
726 if (file == TGSI_FILE_TEMPORARY) {
727 return emit->temp_map[index].index;
728 }
729 else {
730 return index;
731 }
732 }
733
734
735 /**
736 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
737 * Note: the operandType field must already be initialized.
738 */
739 static VGPU10OperandToken0
740 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
741 VGPU10OperandToken0 operand0,
742 unsigned file,
743 boolean indirect, boolean index2D,
744 unsigned tempArrayID)
745 {
746 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
747
748 /*
749 * Compute index dimensions
750 */
751 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
752 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
753 /* there's no swizzle for in-line immediates */
754 indexDim = VGPU10_OPERAND_INDEX_0D;
755 assert(operand0.selectionMode == 0);
756 }
757 else {
758 if (index2D ||
759 tempArrayID > 0 ||
760 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
761 indexDim = VGPU10_OPERAND_INDEX_2D;
762 }
763 else {
764 indexDim = VGPU10_OPERAND_INDEX_1D;
765 }
766 }
767
768 /*
769 * Compute index representations (immediate, relative, etc).
770 */
771 if (tempArrayID > 0) {
772 assert(file == TGSI_FILE_TEMPORARY);
773 /* First index is the array ID, second index is the array element */
774 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
775 if (indirect) {
776 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
777 }
778 else {
779 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
780 }
781 }
782 else if (indirect) {
783 if (file == TGSI_FILE_CONSTANT) {
784 /* index[0] indicates which constant buffer while index[1] indicates
785 * the position in the constant buffer.
786 */
787 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
788 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
789 }
790 else {
791 /* All other register files are 1-dimensional */
792 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE;
793 }
794 }
795 else {
796 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
797 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
798 }
799
800 operand0.indexDimension = indexDim;
801 operand0.index0Representation = index0Rep;
802 operand0.index1Representation = index1Rep;
803
804 return operand0;
805 }
806
807
808 /**
809 * Emit the operand for expressing an address register for indirect indexing.
810 * Note that the address register is really just a temp register.
811 * \param addr_reg_index which address register to use
812 */
813 static void
814 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
815 unsigned addr_reg_index)
816 {
817 unsigned tmp_reg_index;
818 VGPU10OperandToken0 operand0;
819
820 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
821
822 tmp_reg_index = emit->address_reg_index[addr_reg_index];
823
824 /* operand0 is a simple temporary register, selecting one component */
825 operand0.value = 0;
826 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
827 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
828 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
829 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
830 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
831 operand0.swizzleX = 0;
832 operand0.swizzleY = 1;
833 operand0.swizzleZ = 2;
834 operand0.swizzleW = 3;
835
836 emit_dword(emit, operand0.value);
837 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
838 }
839
840
841 /**
842 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
843 * \param emit the emitter context
844 * \param reg the TGSI dst register to translate
845 */
846 static void
847 emit_dst_register(struct svga_shader_emitter_v10 *emit,
848 const struct tgsi_full_dst_register *reg)
849 {
850 unsigned file = reg->Register.File;
851 unsigned index = reg->Register.Index;
852 const unsigned sem_name = emit->info.output_semantic_name[index];
853 const unsigned sem_index = emit->info.output_semantic_index[index];
854 unsigned writemask = reg->Register.WriteMask;
855 const unsigned indirect = reg->Register.Indirect;
856 const unsigned tempArrayId = get_temp_array_id(emit, file, index);
857 const unsigned index2d = reg->Register.Dimension;
858 VGPU10OperandToken0 operand0;
859
860 if (file == TGSI_FILE_OUTPUT) {
861 if (emit->unit == PIPE_SHADER_VERTEX ||
862 emit->unit == PIPE_SHADER_GEOMETRY) {
863 if (index == emit->vposition.out_index &&
864 emit->vposition.tmp_index != INVALID_INDEX) {
865 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
866 * vertex position result in a temporary so that we can modify
867 * it in the post_helper() code.
868 */
869 file = TGSI_FILE_TEMPORARY;
870 index = emit->vposition.tmp_index;
871 }
872 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
873 emit->clip_dist_tmp_index != INVALID_INDEX) {
874 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
875 * We store the clip distance in a temporary first, then
876 * we'll copy it to the shadow copy and to CLIPDIST with the
877 * enabled planes mask in emit_clip_distance_instructions().
878 */
879 file = TGSI_FILE_TEMPORARY;
880 index = emit->clip_dist_tmp_index + sem_index;
881 }
882 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
883 emit->clip_vertex_tmp_index != INVALID_INDEX) {
884 /* replace the CLIPVERTEX output register with a temporary */
885 assert(emit->clip_mode == CLIP_VERTEX);
886 assert(sem_index == 0);
887 file = TGSI_FILE_TEMPORARY;
888 index = emit->clip_vertex_tmp_index;
889 }
890 }
891 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
892 if (sem_name == TGSI_SEMANTIC_POSITION) {
893 /* Fragment depth output register */
894 operand0.value = 0;
895 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
896 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
897 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
898 emit_dword(emit, operand0.value);
899 return;
900 }
901 else if (index == emit->fs.color_out_index[0] &&
902 emit->fs.color_tmp_index != INVALID_INDEX) {
903 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
904 * fragment color result in a temporary so that we can read it
905 * it in the post_helper() code.
906 */
907 file = TGSI_FILE_TEMPORARY;
908 index = emit->fs.color_tmp_index;
909 }
910 else {
911 /* Typically, for fragment shaders, the output register index
912 * matches the color semantic index. But not when we write to
913 * the fragment depth register. In that case, OUT[0] will be
914 * fragdepth and OUT[1] will be the 0th color output. We need
915 * to use the semantic index for color outputs.
916 */
917 assert(sem_name == TGSI_SEMANTIC_COLOR);
918 index = emit->info.output_semantic_index[index];
919
920 emit->num_output_writes++;
921 }
922 }
923 }
924
925 /* init operand tokens to all zero */
926 operand0.value = 0;
927
928 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
929
930 /* the operand has a writemask */
931 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
932
933 /* Which of the four dest components to write to. Note that we can use a
934 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
935 */
936 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
937 operand0.mask = writemask;
938
939 /* translate TGSI register file type to VGPU10 operand type */
940 operand0.operandType = translate_register_file(file, tempArrayId > 0);
941
942 check_register_index(emit, operand0.operandType, index);
943
944 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
945 index2d, tempArrayId);
946
947 /* Emit tokens */
948 emit_dword(emit, operand0.value);
949 if (tempArrayId > 0) {
950 emit_dword(emit, tempArrayId);
951 }
952
953 emit_dword(emit, remap_temp_index(emit, file, index));
954
955 if (indirect) {
956 emit_indirect_register(emit, reg->Indirect.Index);
957 }
958 }
959
960
961 /**
962 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
963 */
964 static void
965 emit_src_register(struct svga_shader_emitter_v10 *emit,
966 const struct tgsi_full_src_register *reg)
967 {
968 unsigned file = reg->Register.File;
969 unsigned index = reg->Register.Index;
970 const unsigned indirect = reg->Register.Indirect;
971 const unsigned tempArrayId = get_temp_array_id(emit, file, index);
972 const unsigned index2d = reg->Register.Dimension;
973 const unsigned swizzleX = reg->Register.SwizzleX;
974 const unsigned swizzleY = reg->Register.SwizzleY;
975 const unsigned swizzleZ = reg->Register.SwizzleZ;
976 const unsigned swizzleW = reg->Register.SwizzleW;
977 const unsigned absolute = reg->Register.Absolute;
978 const unsigned negate = reg->Register.Negate;
979 bool is_prim_id = FALSE;
980
981 VGPU10OperandToken0 operand0;
982 VGPU10OperandToken1 operand1;
983
984 if (emit->unit == PIPE_SHADER_FRAGMENT &&
985 file == TGSI_FILE_INPUT) {
986 if (index == emit->fs.face_input_index) {
987 /* Replace INPUT[FACE] with TEMP[FACE] */
988 file = TGSI_FILE_TEMPORARY;
989 index = emit->fs.face_tmp_index;
990 }
991 else if (index == emit->fs.fragcoord_input_index) {
992 /* Replace INPUT[POSITION] with TEMP[POSITION] */
993 file = TGSI_FILE_TEMPORARY;
994 index = emit->fs.fragcoord_tmp_index;
995 }
996 else {
997 /* We remap fragment shader inputs to that FS input indexes
998 * match up with VS/GS output indexes.
999 */
1000 index = emit->linkage.input_map[index];
1001 }
1002 }
1003 else if (emit->unit == PIPE_SHADER_GEOMETRY &&
1004 file == TGSI_FILE_INPUT) {
1005 is_prim_id = (index == emit->gs.prim_id_index);
1006 index = emit->linkage.input_map[index];
1007 }
1008 else if (emit->unit == PIPE_SHADER_VERTEX) {
1009 if (file == TGSI_FILE_INPUT) {
1010 /* if input is adjusted... */
1011 if ((emit->key.vs.adjust_attrib_w_1 |
1012 emit->key.vs.adjust_attrib_itof |
1013 emit->key.vs.adjust_attrib_utof |
1014 emit->key.vs.attrib_is_bgra |
1015 emit->key.vs.attrib_puint_to_snorm |
1016 emit->key.vs.attrib_puint_to_uscaled |
1017 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1018 file = TGSI_FILE_TEMPORARY;
1019 index = emit->vs.adjusted_input[index];
1020 }
1021 }
1022 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1023 assert(index < Elements(emit->system_value_indexes));
1024 index = emit->system_value_indexes[index];
1025 }
1026 }
1027
1028 operand0.value = operand1.value = 0;
1029
1030 if (is_prim_id) {
1031 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1032 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1033 }
1034 else {
1035 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1036 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1037 }
1038
1039 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1040 index2d, tempArrayId);
1041
1042 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1043 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1044 /* there's no swizzle for in-line immediates */
1045 if (swizzleX == swizzleY &&
1046 swizzleX == swizzleZ &&
1047 swizzleX == swizzleW) {
1048 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1049 }
1050 else {
1051 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1052 }
1053
1054 operand0.swizzleX = swizzleX;
1055 operand0.swizzleY = swizzleY;
1056 operand0.swizzleZ = swizzleZ;
1057 operand0.swizzleW = swizzleW;
1058
1059 if (absolute || negate) {
1060 operand0.extended = 1;
1061 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1062 if (absolute && !negate)
1063 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1064 if (!absolute && negate)
1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1066 if (absolute && negate)
1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1068 }
1069 }
1070
1071 /* Emit the operand tokens */
1072 emit_dword(emit, operand0.value);
1073 if (operand0.extended)
1074 emit_dword(emit, operand1.value);
1075
1076 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1077 /* Emit the four float/int in-line immediate values */
1078 unsigned *c;
1079 assert(index < Elements(emit->immediates));
1080 assert(file == TGSI_FILE_IMMEDIATE);
1081 assert(swizzleX < 4);
1082 assert(swizzleY < 4);
1083 assert(swizzleZ < 4);
1084 assert(swizzleW < 4);
1085 c = (unsigned *) emit->immediates[index];
1086 emit_dword(emit, c[swizzleX]);
1087 emit_dword(emit, c[swizzleY]);
1088 emit_dword(emit, c[swizzleZ]);
1089 emit_dword(emit, c[swizzleW]);
1090 }
1091 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1092 /* Emit the register index(es) */
1093 if (index2d ||
1094 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) {
1095 emit_dword(emit, reg->Dimension.Index);
1096 }
1097
1098 if (tempArrayId > 0) {
1099 emit_dword(emit, tempArrayId);
1100 }
1101
1102 emit_dword(emit, remap_temp_index(emit, file, index));
1103
1104 if (indirect) {
1105 emit_indirect_register(emit, reg->Indirect.Index);
1106 }
1107 }
1108 }
1109
1110
1111 /**
1112 * Emit a resource operand (for use with a SAMPLE instruction).
1113 */
1114 static void
1115 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1116 unsigned resource_number)
1117 {
1118 VGPU10OperandToken0 operand0;
1119
1120 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1121
1122 /* init */
1123 operand0.value = 0;
1124
1125 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1126 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1127 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1128 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1129 operand0.swizzleX = VGPU10_COMPONENT_X;
1130 operand0.swizzleY = VGPU10_COMPONENT_Y;
1131 operand0.swizzleZ = VGPU10_COMPONENT_Z;
1132 operand0.swizzleW = VGPU10_COMPONENT_W;
1133
1134 emit_dword(emit, operand0.value);
1135 emit_dword(emit, resource_number);
1136 }
1137
1138
1139 /**
1140 * Emit a sampler operand (for use with a SAMPLE instruction).
1141 */
1142 static void
1143 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1144 unsigned sampler_number)
1145 {
1146 VGPU10OperandToken0 operand0;
1147
1148 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1149
1150 /* init */
1151 operand0.value = 0;
1152
1153 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1154 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1155
1156 emit_dword(emit, operand0.value);
1157 emit_dword(emit, sampler_number);
1158 }
1159
1160
1161 /**
1162 * Emit an operand which reads the IS_FRONT_FACING register.
1163 */
1164 static void
1165 emit_face_register(struct svga_shader_emitter_v10 *emit)
1166 {
1167 VGPU10OperandToken0 operand0;
1168 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1169
1170 /* init */
1171 operand0.value = 0;
1172
1173 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1174 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1175 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1176 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1177
1178 operand0.swizzleX = VGPU10_COMPONENT_X;
1179 operand0.swizzleY = VGPU10_COMPONENT_X;
1180 operand0.swizzleZ = VGPU10_COMPONENT_X;
1181 operand0.swizzleW = VGPU10_COMPONENT_X;
1182
1183 emit_dword(emit, operand0.value);
1184 emit_dword(emit, index);
1185 }
1186
1187
1188 /**
1189 * Emit the token for a VGPU10 opcode.
1190 * \param saturate clamp result to [0,1]?
1191 */
1192 static void
1193 emit_opcode(struct svga_shader_emitter_v10 *emit,
1194 unsigned vgpu10_opcode, boolean saturate)
1195 {
1196 VGPU10OpcodeToken0 token0;
1197
1198 token0.value = 0; /* init all fields to zero */
1199 token0.opcodeType = vgpu10_opcode;
1200 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1201 token0.saturate = saturate;
1202
1203 emit_dword(emit, token0.value);
1204 }
1205
1206
1207 /**
1208 * Emit the token for a VGPU10 resinfo instruction.
1209 * \param modifier return type modifier, _uint or _rcpFloat.
1210 * TODO: We may want to remove this parameter if it will
1211 * only ever be used as _uint.
1212 */
1213 static void
1214 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
1215 VGPU10_RESINFO_RETURN_TYPE modifier)
1216 {
1217 VGPU10OpcodeToken0 token0;
1218
1219 token0.value = 0; /* init all fields to zero */
1220 token0.opcodeType = VGPU10_OPCODE_RESINFO;
1221 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1222 token0.resinfoReturnType = modifier;
1223
1224 emit_dword(emit, token0.value);
1225 }
1226
1227
1228 /**
1229 * Emit opcode tokens for a texture sample instruction. Texture instructions
1230 * can be rather complicated (texel offsets, etc) so we have this specialized
1231 * function.
1232 */
1233 static void
1234 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
1235 unsigned vgpu10_opcode, boolean saturate,
1236 const int offsets[3])
1237 {
1238 VGPU10OpcodeToken0 token0;
1239 VGPU10OpcodeToken1 token1;
1240
1241 token0.value = 0; /* init all fields to zero */
1242 token0.opcodeType = vgpu10_opcode;
1243 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1244 token0.saturate = saturate;
1245
1246 if (offsets[0] || offsets[1] || offsets[2]) {
1247 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1248 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1249 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
1250 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1251 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1252 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
1253
1254 token0.extended = 1;
1255 token1.value = 0;
1256 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
1257 token1.offsetU = offsets[0];
1258 token1.offsetV = offsets[1];
1259 token1.offsetW = offsets[2];
1260 }
1261
1262 emit_dword(emit, token0.value);
1263 if (token0.extended) {
1264 emit_dword(emit, token1.value);
1265 }
1266 }
1267
1268
1269 /**
1270 * Emit a DISCARD opcode token.
1271 * If nonzero is set, we'll discard the fragment if the X component is not 0.
1272 * Otherwise, we'll discard the fragment if the X component is 0.
1273 */
1274 static void
1275 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
1276 {
1277 VGPU10OpcodeToken0 opcode0;
1278
1279 opcode0.value = 0;
1280 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
1281 if (nonzero)
1282 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
1283
1284 emit_dword(emit, opcode0.value);
1285 }
1286
1287
1288 /**
1289 * We need to call this before we begin emitting a VGPU10 instruction.
1290 */
1291 static void
1292 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
1293 {
1294 assert(emit->inst_start_token == 0);
1295 /* Save location of the instruction's VGPU10OpcodeToken0 token.
1296 * Note, we can't save a pointer because it would become invalid if
1297 * we have to realloc the output buffer.
1298 */
1299 emit->inst_start_token = emit_get_num_tokens(emit);
1300 }
1301
1302
1303 /**
1304 * We need to call this after we emit the last token of a VGPU10 instruction.
1305 * This function patches in the opcode token's instructionLength field.
1306 */
1307 static void
1308 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
1309 {
1310 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
1311 unsigned inst_length;
1312
1313 assert(emit->inst_start_token > 0);
1314
1315 if (emit->discard_instruction) {
1316 /* Back up the emit->ptr to where this instruction started so
1317 * that we discard the current instruction.
1318 */
1319 emit->ptr = (char *) (tokens + emit->inst_start_token);
1320 }
1321 else {
1322 /* Compute instruction length and patch that into the start of
1323 * the instruction.
1324 */
1325 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
1326
1327 assert(inst_length > 0);
1328
1329 tokens[emit->inst_start_token].instructionLength = inst_length;
1330 }
1331
1332 emit->inst_start_token = 0; /* reset to zero for error checking */
1333 emit->discard_instruction = FALSE;
1334 }
1335
1336
1337 /**
1338 * Return index for a free temporary register.
1339 */
1340 static unsigned
1341 get_temp_index(struct svga_shader_emitter_v10 *emit)
1342 {
1343 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
1344 return emit->num_shader_temps + emit->internal_temp_count++;
1345 }
1346
1347
1348 /**
1349 * Release the temporaries which were generated by get_temp_index().
1350 */
1351 static void
1352 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
1353 {
1354 emit->internal_temp_count = 0;
1355 }
1356
1357
1358 /**
1359 * Create a tgsi_full_src_register.
1360 */
1361 static struct tgsi_full_src_register
1362 make_src_reg(unsigned file, unsigned index)
1363 {
1364 struct tgsi_full_src_register reg;
1365
1366 memset(&reg, 0, sizeof(reg));
1367 reg.Register.File = file;
1368 reg.Register.Index = index;
1369 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
1370 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
1371 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1372 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
1373 return reg;
1374 }
1375
1376
1377 /**
1378 * Create a tgsi_full_src_register for a temporary.
1379 */
1380 static struct tgsi_full_src_register
1381 make_src_temp_reg(unsigned index)
1382 {
1383 return make_src_reg(TGSI_FILE_TEMPORARY, index);
1384 }
1385
1386
1387 /**
1388 * Create a tgsi_full_src_register for a constant.
1389 */
1390 static struct tgsi_full_src_register
1391 make_src_const_reg(unsigned index)
1392 {
1393 return make_src_reg(TGSI_FILE_CONSTANT, index);
1394 }
1395
1396
1397 /**
1398 * Create a tgsi_full_src_register for an immediate constant.
1399 */
1400 static struct tgsi_full_src_register
1401 make_src_immediate_reg(unsigned index)
1402 {
1403 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
1404 }
1405
1406
1407 /**
1408 * Create a tgsi_full_dst_register.
1409 */
1410 static struct tgsi_full_dst_register
1411 make_dst_reg(unsigned file, unsigned index)
1412 {
1413 struct tgsi_full_dst_register reg;
1414
1415 memset(&reg, 0, sizeof(reg));
1416 reg.Register.File = file;
1417 reg.Register.Index = index;
1418 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1419 return reg;
1420 }
1421
1422
1423 /**
1424 * Create a tgsi_full_dst_register for a temporary.
1425 */
1426 static struct tgsi_full_dst_register
1427 make_dst_temp_reg(unsigned index)
1428 {
1429 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
1430 }
1431
1432
1433 /**
1434 * Create a tgsi_full_dst_register for an output.
1435 */
1436 static struct tgsi_full_dst_register
1437 make_dst_output_reg(unsigned index)
1438 {
1439 return make_dst_reg(TGSI_FILE_OUTPUT, index);
1440 }
1441
1442
1443 /**
1444 * Create negated tgsi_full_src_register.
1445 */
1446 static struct tgsi_full_src_register
1447 negate_src(const struct tgsi_full_src_register *reg)
1448 {
1449 struct tgsi_full_src_register neg = *reg;
1450 neg.Register.Negate = !reg->Register.Negate;
1451 return neg;
1452 }
1453
1454 /**
1455 * Create absolute value of a tgsi_full_src_register.
1456 */
1457 static struct tgsi_full_src_register
1458 absolute_src(const struct tgsi_full_src_register *reg)
1459 {
1460 struct tgsi_full_src_register absolute = *reg;
1461 absolute.Register.Absolute = 1;
1462 return absolute;
1463 }
1464
1465
1466 /** Return the named swizzle term from the src register */
1467 static inline unsigned
1468 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
1469 {
1470 switch (term) {
1471 case TGSI_SWIZZLE_X:
1472 return reg->Register.SwizzleX;
1473 case TGSI_SWIZZLE_Y:
1474 return reg->Register.SwizzleY;
1475 case TGSI_SWIZZLE_Z:
1476 return reg->Register.SwizzleZ;
1477 case TGSI_SWIZZLE_W:
1478 return reg->Register.SwizzleW;
1479 default:
1480 assert(!"Bad swizzle");
1481 return TGSI_SWIZZLE_X;
1482 }
1483 }
1484
1485
1486 /**
1487 * Create swizzled tgsi_full_src_register.
1488 */
1489 static struct tgsi_full_src_register
1490 swizzle_src(const struct tgsi_full_src_register *reg,
1491 unsigned swizzleX, unsigned swizzleY,
1492 unsigned swizzleZ, unsigned swizzleW)
1493 {
1494 struct tgsi_full_src_register swizzled = *reg;
1495 /* Note: we swizzle the current swizzle */
1496 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
1497 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
1498 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
1499 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
1500 return swizzled;
1501 }
1502
1503
1504 /**
1505 * Create swizzled tgsi_full_src_register where all the swizzle
1506 * terms are the same.
1507 */
1508 static struct tgsi_full_src_register
1509 scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
1510 {
1511 struct tgsi_full_src_register swizzled = *reg;
1512 /* Note: we swizzle the current swizzle */
1513 swizzled.Register.SwizzleX =
1514 swizzled.Register.SwizzleY =
1515 swizzled.Register.SwizzleZ =
1516 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
1517 return swizzled;
1518 }
1519
1520
1521 /**
1522 * Create new tgsi_full_dst_register with writemask.
1523 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
1524 */
1525 static struct tgsi_full_dst_register
1526 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
1527 {
1528 struct tgsi_full_dst_register masked = *reg;
1529 masked.Register.WriteMask = mask;
1530 return masked;
1531 }
1532
1533
1534 /**
1535 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
1536 */
1537 static boolean
1538 same_swizzle_terms(const struct tgsi_full_src_register *reg)
1539 {
1540 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
1541 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
1542 reg->Register.SwizzleZ == reg->Register.SwizzleW);
1543 }
1544
1545
1546 /**
1547 * Search the vector for the value 'x' and return its position.
1548 */
1549 static int
1550 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
1551 union tgsi_immediate_data x)
1552 {
1553 unsigned i;
1554 for (i = 0; i < 4; i++) {
1555 if (vec[i].Int == x.Int)
1556 return i;
1557 }
1558 return -1;
1559 }
1560
1561
1562 /**
1563 * Helper used by make_immediate_reg(), make_immediate_reg_4().
1564 */
1565 static int
1566 find_immediate(struct svga_shader_emitter_v10 *emit,
1567 union tgsi_immediate_data x, unsigned startIndex)
1568 {
1569 const unsigned endIndex = emit->num_immediates;
1570 unsigned i;
1571
1572 assert(emit->immediates_emitted);
1573
1574 /* Search immediates for x, y, z, w */
1575 for (i = startIndex; i < endIndex; i++) {
1576 if (x.Int == emit->immediates[i][0].Int ||
1577 x.Int == emit->immediates[i][1].Int ||
1578 x.Int == emit->immediates[i][2].Int ||
1579 x.Int == emit->immediates[i][3].Int) {
1580 return i;
1581 }
1582 }
1583 /* Should never try to use an immediate value that wasn't pre-declared */
1584 assert(!"find_immediate() failed!");
1585 return -1;
1586 }
1587
1588
1589 /**
1590 * Return a tgsi_full_src_register for an immediate/literal
1591 * union tgsi_immediate_data[4] value.
1592 * Note: the values must have been previously declared/allocated in
1593 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
1594 * vec4 immediate.
1595 */
1596 static struct tgsi_full_src_register
1597 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
1598 const union tgsi_immediate_data imm[4])
1599 {
1600 struct tgsi_full_src_register reg;
1601 unsigned i;
1602
1603 for (i = 0; i < emit->num_common_immediates; i++) {
1604 /* search for first component value */
1605 int immpos = find_immediate(emit, imm[0], i);
1606 int x, y, z, w;
1607
1608 assert(immpos >= 0);
1609
1610 /* find remaining components within the immediate vector */
1611 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
1612 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
1613 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
1614 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
1615
1616 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
1617 /* found them all */
1618 memset(&reg, 0, sizeof(reg));
1619 reg.Register.File = TGSI_FILE_IMMEDIATE;
1620 reg.Register.Index = immpos;
1621 reg.Register.SwizzleX = x;
1622 reg.Register.SwizzleY = y;
1623 reg.Register.SwizzleZ = z;
1624 reg.Register.SwizzleW = w;
1625 return reg;
1626 }
1627 /* else, keep searching */
1628 }
1629
1630 assert(!"Failed to find immediate register!");
1631
1632 /* Just return IMM[0].xxxx */
1633 memset(&reg, 0, sizeof(reg));
1634 reg.Register.File = TGSI_FILE_IMMEDIATE;
1635 return reg;
1636 }
1637
1638
1639 /**
1640 * Return a tgsi_full_src_register for an immediate/literal
1641 * union tgsi_immediate_data value of the form {value, value, value, value}.
1642 * \sa make_immediate_reg_4() regarding allowed values.
1643 */
1644 static struct tgsi_full_src_register
1645 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
1646 union tgsi_immediate_data value)
1647 {
1648 struct tgsi_full_src_register reg;
1649 int immpos = find_immediate(emit, value, 0);
1650
1651 assert(immpos >= 0);
1652
1653 memset(&reg, 0, sizeof(reg));
1654 reg.Register.File = TGSI_FILE_IMMEDIATE;
1655 reg.Register.Index = immpos;
1656 reg.Register.SwizzleX =
1657 reg.Register.SwizzleY =
1658 reg.Register.SwizzleZ =
1659 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
1660
1661 return reg;
1662 }
1663
1664
1665 /**
1666 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
1667 * \sa make_immediate_reg_4() regarding allowed values.
1668 */
1669 static struct tgsi_full_src_register
1670 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
1671 float x, float y, float z, float w)
1672 {
1673 union tgsi_immediate_data imm[4];
1674 imm[0].Float = x;
1675 imm[1].Float = y;
1676 imm[2].Float = z;
1677 imm[3].Float = w;
1678 return make_immediate_reg_4(emit, imm);
1679 }
1680
1681
1682 /**
1683 * Return a tgsi_full_src_register for an immediate/literal float value
1684 * of the form {value, value, value, value}.
1685 * \sa make_immediate_reg_4() regarding allowed values.
1686 */
1687 static struct tgsi_full_src_register
1688 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
1689 {
1690 union tgsi_immediate_data imm;
1691 imm.Float = value;
1692 return make_immediate_reg(emit, imm);
1693 }
1694
1695
1696 /**
1697 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
1698 */
1699 static struct tgsi_full_src_register
1700 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
1701 int x, int y, int z, int w)
1702 {
1703 union tgsi_immediate_data imm[4];
1704 imm[0].Int = x;
1705 imm[1].Int = y;
1706 imm[2].Int = z;
1707 imm[3].Int = w;
1708 return make_immediate_reg_4(emit, imm);
1709 }
1710
1711
1712 /**
1713 * Return a tgsi_full_src_register for an immediate/literal int value
1714 * of the form {value, value, value, value}.
1715 * \sa make_immediate_reg_4() regarding allowed values.
1716 */
1717 static struct tgsi_full_src_register
1718 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
1719 {
1720 union tgsi_immediate_data imm;
1721 imm.Int = value;
1722 return make_immediate_reg(emit, imm);
1723 }
1724
1725
1726 /**
1727 * Allocate space for a union tgsi_immediate_data[4] immediate.
1728 * \return the index/position of the immediate.
1729 */
1730 static unsigned
1731 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
1732 const union tgsi_immediate_data imm[4])
1733 {
1734 unsigned n = emit->num_immediates++;
1735 assert(!emit->immediates_emitted);
1736 assert(n < Elements(emit->immediates));
1737 emit->immediates[n][0] = imm[0];
1738 emit->immediates[n][1] = imm[1];
1739 emit->immediates[n][2] = imm[2];
1740 emit->immediates[n][3] = imm[3];
1741 return n;
1742 }
1743
1744
1745 /**
1746 * Allocate space for a float[4] immediate.
1747 * \return the index/position of the immediate.
1748 */
1749 static unsigned
1750 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
1751 float x, float y, float z, float w)
1752 {
1753 union tgsi_immediate_data imm[4];
1754 imm[0].Float = x;
1755 imm[1].Float = y;
1756 imm[2].Float = z;
1757 imm[3].Float = w;
1758 return alloc_immediate_4(emit, imm);
1759 }
1760
1761
1762 /**
1763 * Allocate space for a int[4] immediate.
1764 * \return the index/position of the immediate.
1765 */
1766 static unsigned
1767 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
1768 int x, int y, int z, int w)
1769 {
1770 union tgsi_immediate_data imm[4];
1771 imm[0].Int = x;
1772 imm[1].Int = y;
1773 imm[2].Int = z;
1774 imm[3].Int = w;
1775 return alloc_immediate_4(emit, imm);
1776 }
1777
1778
1779 /**
1780 * Allocate a shader input to store a system value.
1781 */
1782 static unsigned
1783 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
1784 {
1785 const unsigned n = emit->info.num_inputs + index;
1786 assert(index < Elements(emit->system_value_indexes));
1787 emit->system_value_indexes[index] = n;
1788 return n;
1789 }
1790
1791
1792 /**
1793 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
1794 */
1795 static boolean
1796 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
1797 const struct tgsi_full_immediate *imm)
1798 {
1799 /* We don't actually emit any code here. We just save the
1800 * immediate values and emit them later.
1801 */
1802 alloc_immediate_4(emit, imm->u);
1803 return TRUE;
1804 }
1805
1806
1807 /**
1808 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
1809 * containing all the immediate values previously allocated
1810 * with alloc_immediate_4().
1811 */
1812 static boolean
1813 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
1814 {
1815 VGPU10OpcodeToken0 token;
1816
1817 assert(!emit->immediates_emitted);
1818
1819 token.value = 0;
1820 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
1821 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
1822
1823 /* Note: no begin/end_emit_instruction() calls */
1824 emit_dword(emit, token.value);
1825 emit_dword(emit, 2 + 4 * emit->num_immediates);
1826 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
1827
1828 emit->immediates_emitted = TRUE;
1829
1830 return TRUE;
1831 }
1832
1833
1834 /**
1835 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
1836 * interpolation mode.
1837 * \return a VGPU10_INTERPOLATION_x value
1838 */
1839 static unsigned
1840 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
1841 unsigned interp, unsigned interpolate_loc)
1842 {
1843 if (interp == TGSI_INTERPOLATE_COLOR) {
1844 interp = emit->key.fs.flatshade ?
1845 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1846 }
1847
1848 switch (interp) {
1849 case TGSI_INTERPOLATE_CONSTANT:
1850 return VGPU10_INTERPOLATION_CONSTANT;
1851 case TGSI_INTERPOLATE_LINEAR:
1852 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1853 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
1854 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
1855 case TGSI_INTERPOLATE_PERSPECTIVE:
1856 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
1857 VGPU10_INTERPOLATION_LINEAR_CENTROID :
1858 VGPU10_INTERPOLATION_LINEAR;
1859 default:
1860 assert(!"Unexpected interpolation mode");
1861 return VGPU10_INTERPOLATION_CONSTANT;
1862 }
1863 }
1864
1865
1866 /**
1867 * Translate a TGSI property to VGPU10.
1868 * Don't emit any instructions yet, only need to gather the primitive property information.
1869 * The output primitive topology might be changed later. The final property instructions
1870 * will be emitted as part of the pre-helper code.
1871 */
1872 static boolean
1873 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
1874 const struct tgsi_full_property *prop)
1875 {
1876 static const VGPU10_PRIMITIVE primType[] = {
1877 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */
1878 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */
1879 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */
1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */
1881 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */
1882 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */
1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */
1884 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */
1885 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */
1887 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
1888 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1889 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1890 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1891 };
1892
1893 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
1894 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */
1895 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */
1896 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */
1897 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */
1898 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */
1899 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
1901 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */
1902 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */
1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */
1904 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */
1905 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
1906 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
1907 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
1908 };
1909
1910 static const unsigned inputArraySize[] = {
1911 0, /* VGPU10_PRIMITIVE_UNDEFINED */
1912 1, /* VGPU10_PRIMITIVE_POINT */
1913 2, /* VGPU10_PRIMITIVE_LINE */
1914 3, /* VGPU10_PRIMITIVE_TRIANGLE */
1915 0,
1916 0,
1917 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
1918 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
1919 };
1920
1921 switch (prop->Property.PropertyName) {
1922 case TGSI_PROPERTY_GS_INPUT_PRIM:
1923 assert(prop->u[0].Data < Elements(primType));
1924 emit->gs.prim_type = primType[prop->u[0].Data];
1925 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
1926 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
1927 break;
1928
1929 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1930 assert(prop->u[0].Data < Elements(primTopology));
1931 emit->gs.prim_topology = primTopology[prop->u[0].Data];
1932 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
1933 break;
1934
1935 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1936 emit->gs.max_out_vertices = prop->u[0].Data;
1937 break;
1938
1939 default:
1940 break;
1941 }
1942
1943 return TRUE;
1944 }
1945
1946
1947 static void
1948 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
1949 VGPU10OpcodeToken0 opcode0, unsigned nData,
1950 unsigned data)
1951 {
1952 begin_emit_instruction(emit);
1953 emit_dword(emit, opcode0.value);
1954 if (nData)
1955 emit_dword(emit, data);
1956 end_emit_instruction(emit);
1957 }
1958
1959
1960 /**
1961 * Emit property instructions
1962 */
1963 static void
1964 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
1965 {
1966 VGPU10OpcodeToken0 opcode0;
1967
1968 assert(emit->unit == PIPE_SHADER_GEOMETRY);
1969
1970 /* emit input primitive type declaration */
1971 opcode0.value = 0;
1972 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
1973 opcode0.primitive = emit->gs.prim_type;
1974 emit_property_instruction(emit, opcode0, 0, 0);
1975
1976 /* emit output primitive topology declaration */
1977 opcode0.value = 0;
1978 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
1979 opcode0.primitiveTopology = emit->gs.prim_topology;
1980 emit_property_instruction(emit, opcode0, 0, 0);
1981
1982 /* emit max output vertices */
1983 opcode0.value = 0;
1984 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
1985 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
1986 }
1987
1988
1989 /**
1990 * Emit a vgpu10 declaration "instruction".
1991 * \param index the register index
1992 * \param size array size of the operand. In most cases, it is 1,
1993 * but for inputs to geometry shader, the array size varies
1994 * depending on the primitive type.
1995 */
1996 static void
1997 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
1998 VGPU10OpcodeToken0 opcode0,
1999 VGPU10OperandToken0 operand0,
2000 VGPU10NameToken name_token,
2001 unsigned index, unsigned size)
2002 {
2003 assert(opcode0.opcodeType);
2004 assert(operand0.mask);
2005
2006 begin_emit_instruction(emit);
2007 emit_dword(emit, opcode0.value);
2008
2009 emit_dword(emit, operand0.value);
2010
2011 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
2012 /* Next token is the index of the register to declare */
2013 emit_dword(emit, index);
2014 }
2015 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
2016 /* Next token is the size of the register */
2017 emit_dword(emit, size);
2018
2019 /* Followed by the index of the register */
2020 emit_dword(emit, index);
2021 }
2022
2023 if (name_token.value) {
2024 emit_dword(emit, name_token.value);
2025 }
2026
2027 end_emit_instruction(emit);
2028 }
2029
2030
2031 /**
2032 * Emit the declaration for a shader input.
2033 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
2034 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
2035 * \param dim index dimension
2036 * \param index the input register index
2037 * \param size array size of the operand. In most cases, it is 1,
2038 * but for inputs to geometry shader, the array size varies
2039 * depending on the primitive type.
2040 * \param name one of VGPU10_NAME_x
2041 * \parma numComp number of components
2042 * \param selMode component selection mode
2043 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2044 * \param interpMode interpolation mode
2045 */
2046 static void
2047 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
2048 unsigned opcodeType, unsigned operandType,
2049 unsigned dim, unsigned index, unsigned size,
2050 unsigned name, unsigned numComp,
2051 unsigned selMode, unsigned usageMask,
2052 unsigned interpMode)
2053 {
2054 VGPU10OpcodeToken0 opcode0;
2055 VGPU10OperandToken0 operand0;
2056 VGPU10NameToken name_token;
2057
2058 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2059 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
2060 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
2061 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
2063 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
2064 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
2065 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
2066 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
2067 assert(dim <= VGPU10_OPERAND_INDEX_3D);
2068 assert(name == VGPU10_NAME_UNDEFINED ||
2069 name == VGPU10_NAME_POSITION ||
2070 name == VGPU10_NAME_INSTANCE_ID ||
2071 name == VGPU10_NAME_VERTEX_ID ||
2072 name == VGPU10_NAME_PRIMITIVE_ID ||
2073 name == VGPU10_NAME_IS_FRONT_FACE);
2074 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
2075 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
2076 interpMode == VGPU10_INTERPOLATION_LINEAR ||
2077 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
2078 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
2079 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
2080
2081 check_register_index(emit, opcodeType, index);
2082
2083 opcode0.value = operand0.value = name_token.value = 0;
2084
2085 opcode0.opcodeType = opcodeType;
2086 opcode0.interpolationMode = interpMode;
2087
2088 operand0.operandType = operandType;
2089 operand0.numComponents = numComp;
2090 operand0.selectionMode = selMode;
2091 operand0.mask = usageMask;
2092 operand0.indexDimension = dim;
2093 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2094 if (dim == VGPU10_OPERAND_INDEX_2D)
2095 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2096
2097 name_token.name = name;
2098
2099 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
2100 }
2101
2102
2103 /**
2104 * Emit the declaration for a shader output.
2105 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
2106 * \param index the output register index
2107 * \param name one of VGPU10_NAME_x
2108 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
2109 */
2110 static void
2111 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
2112 unsigned type, unsigned index,
2113 unsigned name, unsigned usageMask)
2114 {
2115 VGPU10OpcodeToken0 opcode0;
2116 VGPU10OperandToken0 operand0;
2117 VGPU10NameToken name_token;
2118
2119 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2120 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
2121 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
2122 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
2123 assert(name == VGPU10_NAME_UNDEFINED ||
2124 name == VGPU10_NAME_POSITION ||
2125 name == VGPU10_NAME_PRIMITIVE_ID ||
2126 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
2127 name == VGPU10_NAME_CLIP_DISTANCE);
2128
2129 check_register_index(emit, type, index);
2130
2131 opcode0.value = operand0.value = name_token.value = 0;
2132
2133 opcode0.opcodeType = type;
2134 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
2135 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2136 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2137 operand0.mask = usageMask;
2138 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2139 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2140
2141 name_token.name = name;
2142
2143 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
2144 }
2145
2146
2147 /**
2148 * Emit the declaration for the fragment depth output.
2149 */
2150 static void
2151 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
2152 {
2153 VGPU10OpcodeToken0 opcode0;
2154 VGPU10OperandToken0 operand0;
2155 VGPU10NameToken name_token;
2156
2157 assert(emit->unit == PIPE_SHADER_FRAGMENT);
2158
2159 opcode0.value = operand0.value = name_token.value = 0;
2160
2161 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
2162 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
2163 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
2164 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2165 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2166
2167 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
2168 }
2169
2170
2171 /**
2172 * Emit the declaration for a system value input/output.
2173 */
2174 static void
2175 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
2176 unsigned semantic_name, unsigned index)
2177 {
2178 switch (semantic_name) {
2179 case TGSI_SEMANTIC_INSTANCEID:
2180 index = alloc_system_value_index(emit, index);
2181 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2182 VGPU10_OPERAND_TYPE_INPUT,
2183 VGPU10_OPERAND_INDEX_1D,
2184 index, 1,
2185 VGPU10_NAME_INSTANCE_ID,
2186 VGPU10_OPERAND_4_COMPONENT,
2187 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2188 VGPU10_OPERAND_4_COMPONENT_MASK_X,
2189 VGPU10_INTERPOLATION_UNDEFINED);
2190 break;
2191 case TGSI_SEMANTIC_VERTEXID:
2192 index = alloc_system_value_index(emit, index);
2193 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
2194 VGPU10_OPERAND_TYPE_INPUT,
2195 VGPU10_OPERAND_INDEX_1D,
2196 index, 1,
2197 VGPU10_NAME_VERTEX_ID,
2198 VGPU10_OPERAND_4_COMPONENT,
2199 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2200 VGPU10_OPERAND_4_COMPONENT_MASK_X,
2201 VGPU10_INTERPOLATION_UNDEFINED);
2202 break;
2203 default:
2204 ; /* XXX */
2205 }
2206 }
2207
2208 /**
2209 * Translate a TGSI declaration to VGPU10.
2210 */
2211 static boolean
2212 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
2213 const struct tgsi_full_declaration *decl)
2214 {
2215 switch (decl->Declaration.File) {
2216 case TGSI_FILE_INPUT:
2217 /* do nothing - see emit_input_declarations() */
2218 return TRUE;
2219
2220 case TGSI_FILE_OUTPUT:
2221 assert(decl->Range.First == decl->Range.Last);
2222 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
2223 return TRUE;
2224
2225 case TGSI_FILE_TEMPORARY:
2226 /* Don't declare the temps here. Just keep track of how many
2227 * and emit the declaration later.
2228 */
2229 if (decl->Declaration.Array) {
2230 /* Indexed temporary array. Save the start index of the array
2231 * and the size of the array.
2232 */
2233 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
2234 unsigned i;
2235
2236 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
2237
2238 /* Save this array so we can emit the declaration for it later */
2239 emit->temp_arrays[arrayID].start = decl->Range.First;
2240 emit->temp_arrays[arrayID].size =
2241 decl->Range.Last - decl->Range.First + 1;
2242
2243 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
2244 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
2245 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
2246
2247 /* Fill in the temp_map entries for this array */
2248 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
2249 emit->temp_map[i].arrayId = arrayID;
2250 emit->temp_map[i].index = i - decl->Range.First;
2251 }
2252 }
2253
2254 /* for all temps, indexed or not, keep track of highest index */
2255 emit->num_shader_temps = MAX2(emit->num_shader_temps,
2256 decl->Range.Last + 1);
2257 return TRUE;
2258
2259 case TGSI_FILE_CONSTANT:
2260 /* Don't declare constants here. Just keep track and emit later. */
2261 {
2262 unsigned constbuf = 0, num_consts;
2263 if (decl->Declaration.Dimension) {
2264 constbuf = decl->Dim.Index2D;
2265 }
2266 /* We throw an assertion here when, in fact, the shader should never
2267 * have linked due to constbuf index out of bounds, so we shouldn't
2268 * have reached here.
2269 */
2270 assert(constbuf < Elements(emit->num_shader_consts));
2271
2272 num_consts = MAX2(emit->num_shader_consts[constbuf],
2273 decl->Range.Last + 1);
2274
2275 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
2276 debug_printf("Warning: constant buffer is declared to size [%u]"
2277 " but [%u] is the limit.\n",
2278 num_consts,
2279 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2280 }
2281 /* The linker doesn't enforce the max UBO size so we clamp here */
2282 emit->num_shader_consts[constbuf] =
2283 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
2284 }
2285 return TRUE;
2286
2287 case TGSI_FILE_IMMEDIATE:
2288 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
2289 return FALSE;
2290
2291 case TGSI_FILE_SYSTEM_VALUE:
2292 emit_system_value_declaration(emit, decl->Semantic.Name,
2293 decl->Range.First);
2294 return TRUE;
2295
2296 case TGSI_FILE_SAMPLER:
2297 /* Don't declare samplers here. Just keep track and emit later. */
2298 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
2299 return TRUE;
2300
2301 case TGSI_FILE_RESOURCE:
2302 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
2303 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
2304 assert(!"TGSI_FILE_RESOURCE not handled yet");
2305 return FALSE;
2306
2307 case TGSI_FILE_ADDRESS:
2308 emit->num_address_regs = MAX2(emit->num_address_regs,
2309 decl->Range.Last + 1);
2310 return TRUE;
2311
2312 case TGSI_FILE_SAMPLER_VIEW:
2313 /* Not used at this time, but maybe in the future.
2314 * See emit_resource_declarations().
2315 */
2316 return TRUE;
2317
2318 default:
2319 assert(!"Unexpected type of declaration");
2320 return FALSE;
2321 }
2322 }
2323
2324
2325
2326 /**
2327 * Emit all input declarations.
2328 */
2329 static boolean
2330 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
2331 {
2332 unsigned i;
2333
2334 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2335
2336 for (i = 0; i < emit->linkage.num_inputs; i++) {
2337 unsigned semantic_name = emit->info.input_semantic_name[i];
2338 unsigned usage_mask = emit->info.input_usage_mask[i];
2339 unsigned index = emit->linkage.input_map[i];
2340 unsigned type, interpolationMode, name;
2341
2342 if (usage_mask == 0)
2343 continue; /* register is not actually used */
2344
2345 if (semantic_name == TGSI_SEMANTIC_POSITION) {
2346 /* fragment position input */
2347 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2348 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
2349 name = VGPU10_NAME_POSITION;
2350 if (usage_mask & TGSI_WRITEMASK_W) {
2351 /* we need to replace use of 'w' with '1/w' */
2352 emit->fs.fragcoord_input_index = i;
2353 }
2354 }
2355 else if (semantic_name == TGSI_SEMANTIC_FACE) {
2356 /* fragment front-facing input */
2357 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2358 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2359 name = VGPU10_NAME_IS_FRONT_FACE;
2360 emit->fs.face_input_index = i;
2361 }
2362 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2363 /* primitive ID */
2364 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
2365 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
2366 name = VGPU10_NAME_PRIMITIVE_ID;
2367 }
2368 else {
2369 /* general fragment input */
2370 type = VGPU10_OPCODE_DCL_INPUT_PS;
2371 interpolationMode =
2372 translate_interpolation(emit,
2373 emit->info.input_interpolate[i],
2374 emit->info.input_interpolate_loc[i]);
2375
2376 /* keeps track if flat interpolation mode is being used */
2377 emit->uses_flat_interp = emit->uses_flat_interp ||
2378 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
2379
2380 name = VGPU10_NAME_UNDEFINED;
2381 }
2382
2383 emit_input_declaration(emit, type,
2384 VGPU10_OPERAND_TYPE_INPUT,
2385 VGPU10_OPERAND_INDEX_1D, index, 1,
2386 name,
2387 VGPU10_OPERAND_4_COMPONENT,
2388 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2389 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2390 interpolationMode);
2391 }
2392 }
2393 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
2394
2395 for (i = 0; i < emit->info.num_inputs; i++) {
2396 unsigned semantic_name = emit->info.input_semantic_name[i];
2397 unsigned usage_mask = emit->info.input_usage_mask[i];
2398 unsigned index = emit->linkage.input_map[i];
2399 unsigned opcodeType, operandType;
2400 unsigned numComp, selMode;
2401 unsigned name;
2402 unsigned dim;
2403
2404 if (usage_mask == 0)
2405 continue; /* register is not actually used */
2406
2407 opcodeType = VGPU10_OPCODE_DCL_INPUT;
2408 operandType = VGPU10_OPERAND_TYPE_INPUT;
2409 numComp = VGPU10_OPERAND_4_COMPONENT;
2410 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
2411 name = VGPU10_NAME_UNDEFINED;
2412
2413 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
2414 dim = VGPU10_OPERAND_INDEX_2D;
2415
2416 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
2417 /* Primitive ID */
2418 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
2419 dim = VGPU10_OPERAND_INDEX_0D;
2420 numComp = VGPU10_OPERAND_0_COMPONENT;
2421 selMode = 0;
2422
2423 /* also save the register index so we can check for
2424 * primitive id when emit src register. We need to modify the
2425 * operand type, index dimension when emit primitive id src reg.
2426 */
2427 emit->gs.prim_id_index = i;
2428 }
2429 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2430 /* vertex position input */
2431 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
2432 name = VGPU10_NAME_POSITION;
2433 }
2434
2435 emit_input_declaration(emit, opcodeType, operandType,
2436 dim, index,
2437 emit->gs.input_size,
2438 name,
2439 numComp, selMode,
2440 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2441 VGPU10_INTERPOLATION_UNDEFINED);
2442 }
2443 }
2444 else {
2445 assert(emit->unit == PIPE_SHADER_VERTEX);
2446
2447 for (i = 0; i < emit->info.num_inputs; i++) {
2448 unsigned usage_mask = emit->info.input_usage_mask[i];
2449 unsigned index = i;
2450
2451 if (usage_mask == 0)
2452 continue; /* register is not actually used */
2453
2454 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
2455 VGPU10_OPERAND_TYPE_INPUT,
2456 VGPU10_OPERAND_INDEX_1D, index, 1,
2457 VGPU10_NAME_UNDEFINED,
2458 VGPU10_OPERAND_4_COMPONENT,
2459 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
2460 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
2461 VGPU10_INTERPOLATION_UNDEFINED);
2462 }
2463 }
2464
2465 return TRUE;
2466 }
2467
2468
2469 /**
2470 * Emit all output declarations.
2471 */
2472 static boolean
2473 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
2474 {
2475 unsigned i;
2476
2477 for (i = 0; i < emit->info.num_outputs; i++) {
2478 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
2479 const unsigned semantic_name = emit->info.output_semantic_name[i];
2480 const unsigned semantic_index = emit->info.output_semantic_index[i];
2481 unsigned index = i;
2482
2483 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2484 if (semantic_name == TGSI_SEMANTIC_COLOR) {
2485 assert(semantic_index < Elements(emit->fs.color_out_index));
2486
2487 emit->fs.color_out_index[semantic_index] = index;
2488
2489 /* The semantic index is the shader's color output/buffer index */
2490 emit_output_declaration(emit,
2491 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
2492 VGPU10_NAME_UNDEFINED,
2493 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2494
2495 if (semantic_index == 0) {
2496 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
2497 /* Emit declarations for the additional color outputs
2498 * for broadcasting.
2499 */
2500 unsigned j;
2501 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
2502 /* Allocate a new output index */
2503 unsigned idx = emit->info.num_outputs + j - 1;
2504 emit->fs.color_out_index[j] = idx;
2505 emit_output_declaration(emit,
2506 VGPU10_OPCODE_DCL_OUTPUT, idx,
2507 VGPU10_NAME_UNDEFINED,
2508 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2509 emit->info.output_semantic_index[idx] = j;
2510 }
2511 }
2512 }
2513 else {
2514 assert(!emit->key.fs.write_color0_to_n_cbufs);
2515 }
2516 }
2517 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
2518 /* Fragment depth output */
2519 emit_fragdepth_output_declaration(emit);
2520 }
2521 else {
2522 assert(!"Bad output semantic name");
2523 }
2524 }
2525 else {
2526 /* VS or GS */
2527 unsigned name, type;
2528 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
2529
2530 switch (semantic_name) {
2531 case TGSI_SEMANTIC_POSITION:
2532 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2533 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2534 name = VGPU10_NAME_POSITION;
2535 /* Save the index of the vertex position output register */
2536 emit->vposition.out_index = index;
2537 break;
2538 case TGSI_SEMANTIC_CLIPDIST:
2539 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
2540 name = VGPU10_NAME_CLIP_DISTANCE;
2541 /* save the starting index of the clip distance output register */
2542 if (semantic_index == 0)
2543 emit->clip_dist_out_index = index;
2544 writemask = emit->output_usage_mask[index];
2545 writemask = apply_clip_plane_mask(emit, writemask, semantic_index);
2546 if (writemask == 0x0) {
2547 continue; /* discard this do-nothing declaration */
2548 }
2549 break;
2550 case TGSI_SEMANTIC_PRIMID:
2551 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2552 type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2553 name = VGPU10_NAME_PRIMITIVE_ID;
2554 break;
2555 case TGSI_SEMANTIC_LAYER:
2556 assert(emit->unit == PIPE_SHADER_GEOMETRY);
2557 type = VGPU10_OPCODE_DCL_OUTPUT_SGV;
2558 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
2559 break;
2560 case TGSI_SEMANTIC_CLIPVERTEX:
2561 type = VGPU10_OPCODE_DCL_OUTPUT;
2562 name = VGPU10_NAME_UNDEFINED;
2563 emit->clip_vertex_out_index = index;
2564 break;
2565 default:
2566 /* generic output */
2567 type = VGPU10_OPCODE_DCL_OUTPUT;
2568 name = VGPU10_NAME_UNDEFINED;
2569 }
2570
2571 emit_output_declaration(emit, type, index, name, writemask);
2572 }
2573 }
2574
2575 if (emit->vposition.so_index != INVALID_INDEX &&
2576 emit->vposition.out_index != INVALID_INDEX) {
2577
2578 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2579
2580 /* Emit the declaration for the non-adjusted vertex position
2581 * for stream output purpose
2582 */
2583 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2584 emit->vposition.so_index,
2585 VGPU10_NAME_UNDEFINED,
2586 VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
2587 }
2588
2589 if (emit->clip_dist_so_index != INVALID_INDEX &&
2590 emit->clip_dist_out_index != INVALID_INDEX) {
2591
2592 assert(emit->unit != PIPE_SHADER_FRAGMENT);
2593
2594 /* Emit the declaration for the clip distance shadow copy which
2595 * will be used for stream output purpose and for clip distance
2596 * varying variable
2597 */
2598 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2599 emit->clip_dist_so_index,
2600 VGPU10_NAME_UNDEFINED,
2601 emit->output_usage_mask[emit->clip_dist_out_index]);
2602
2603 if (emit->info.num_written_clipdistance > 4) {
2604 /* for the second clip distance register, each handles 4 planes */
2605 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
2606 emit->clip_dist_so_index + 1,
2607 VGPU10_NAME_UNDEFINED,
2608 emit->output_usage_mask[emit->clip_dist_out_index+1]);
2609 }
2610 }
2611
2612 return TRUE;
2613 }
2614
2615
2616 /**
2617 * Emit the declaration for the temporary registers.
2618 */
2619 static boolean
2620 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
2621 {
2622 unsigned total_temps, reg, i;
2623
2624 total_temps = emit->num_shader_temps;
2625
2626 /* Allocate extra temps for specially-implemented instructions,
2627 * such as LIT.
2628 */
2629 total_temps += MAX_INTERNAL_TEMPS;
2630
2631 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
2632 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
2633 emit->key.clip_plane_enable ||
2634 emit->vposition.so_index != INVALID_INDEX) {
2635 emit->vposition.tmp_index = total_temps;
2636 total_temps += 1;
2637 }
2638
2639 if (emit->unit == PIPE_SHADER_VERTEX) {
2640 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
2641 emit->key.vs.adjust_attrib_itof |
2642 emit->key.vs.adjust_attrib_utof |
2643 emit->key.vs.attrib_is_bgra |
2644 emit->key.vs.attrib_puint_to_snorm |
2645 emit->key.vs.attrib_puint_to_uscaled |
2646 emit->key.vs.attrib_puint_to_sscaled);
2647 while (attrib_mask) {
2648 unsigned index = u_bit_scan(&attrib_mask);
2649 emit->vs.adjusted_input[index] = total_temps++;
2650 }
2651 }
2652
2653 if (emit->clip_mode == CLIP_DISTANCE) {
2654 /* We need to write the clip distance to a temporary register
2655 * first. Then it will be copied to the shadow copy for
2656 * the clip distance varying variable and stream output purpose.
2657 * It will also be copied to the actual CLIPDIST register
2658 * according to the enabled clip planes
2659 */
2660 emit->clip_dist_tmp_index = total_temps++;
2661 if (emit->info.num_written_clipdistance > 4)
2662 total_temps++; /* second clip register */
2663 }
2664 else if (emit->clip_mode == CLIP_VERTEX) {
2665 /* We need to convert the TGSI CLIPVERTEX output to one or more
2666 * clip distances. Allocate a temp reg for the clipvertex here.
2667 */
2668 assert(emit->info.writes_clipvertex > 0);
2669 emit->clip_vertex_tmp_index = total_temps;
2670 total_temps++;
2671 }
2672 }
2673 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
2674 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
2675 emit->key.fs.white_fragments ||
2676 emit->key.fs.write_color0_to_n_cbufs > 1) {
2677 /* Allocate a temp to hold the output color */
2678 emit->fs.color_tmp_index = total_temps;
2679 total_temps += 1;
2680 }
2681
2682 if (emit->fs.face_input_index != INVALID_INDEX) {
2683 /* Allocate a temp for the +/-1 face register */
2684 emit->fs.face_tmp_index = total_temps;
2685 total_temps += 1;
2686 }
2687
2688 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
2689 /* Allocate a temp for modified fragment position register */
2690 emit->fs.fragcoord_tmp_index = total_temps;
2691 total_temps += 1;
2692 }
2693 }
2694
2695 for (i = 0; i < emit->num_address_regs; i++) {
2696 emit->address_reg_index[i] = total_temps++;
2697 }
2698
2699 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
2700 * temp indexes. Basically, we compact all the non-array temp register
2701 * indexes into a consecutive series.
2702 *
2703 * Before, we may have some TGSI declarations like:
2704 * DCL TEMP[0..1], LOCAL
2705 * DCL TEMP[2..4], ARRAY(1), LOCAL
2706 * DCL TEMP[5..7], ARRAY(2), LOCAL
2707 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
2708 *
2709 * After, we'll have a map like this:
2710 * temp_map[0] = { array 0, index 0 }
2711 * temp_map[1] = { array 0, index 1 }
2712 * temp_map[2] = { array 1, index 0 }
2713 * temp_map[3] = { array 1, index 1 }
2714 * temp_map[4] = { array 1, index 2 }
2715 * temp_map[5] = { array 2, index 0 }
2716 * temp_map[6] = { array 2, index 1 }
2717 * temp_map[7] = { array 2, index 2 }
2718 * temp_map[8] = { array 0, index 2 }
2719 * temp_map[9] = { array 0, index 3 }
2720 *
2721 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
2722 * temps numbered 0..3
2723 *
2724 * Any time we emit a temporary register index, we'll have to use the
2725 * temp_map[] table to convert the TGSI index to the VGPU10 index.
2726 *
2727 * Finally, we recompute the total_temps value here.
2728 */
2729 reg = 0;
2730 for (i = 0; i < total_temps; i++) {
2731 if (emit->temp_map[i].arrayId == 0) {
2732 emit->temp_map[i].index = reg++;
2733 }
2734 }
2735 total_temps = reg;
2736
2737 if (0) {
2738 debug_printf("total_temps %u\n", total_temps);
2739 for (i = 0; i < 30; i++) {
2740 debug_printf("temp %u -> array %u index %u\n",
2741 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
2742 }
2743 }
2744
2745 /* Emit declaration of ordinary temp registers */
2746 if (total_temps > 0) {
2747 VGPU10OpcodeToken0 opcode0;
2748
2749 opcode0.value = 0;
2750 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
2751
2752 begin_emit_instruction(emit);
2753 emit_dword(emit, opcode0.value);
2754 emit_dword(emit, total_temps);
2755 end_emit_instruction(emit);
2756 }
2757
2758 /* Emit declarations for indexable temp arrays. Skip 0th entry since
2759 * it's unused.
2760 */
2761 for (i = 1; i < emit->num_temp_arrays; i++) {
2762 unsigned num_temps = emit->temp_arrays[i].size;
2763
2764 if (num_temps > 0) {
2765 VGPU10OpcodeToken0 opcode0;
2766
2767 opcode0.value = 0;
2768 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
2769
2770 begin_emit_instruction(emit);
2771 emit_dword(emit, opcode0.value);
2772 emit_dword(emit, i); /* which array */
2773 emit_dword(emit, num_temps);
2774 emit_dword(emit, 4); /* num components */
2775 end_emit_instruction(emit);
2776
2777 total_temps += num_temps;
2778 }
2779 }
2780
2781 /* Check that the grand total of all regular and indexed temps is
2782 * under the limit.
2783 */
2784 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
2785
2786 return TRUE;
2787 }
2788
2789
2790 static boolean
2791 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
2792 {
2793 VGPU10OpcodeToken0 opcode0;
2794 VGPU10OperandToken0 operand0;
2795 unsigned total_consts, i;
2796
2797 opcode0.value = 0;
2798 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
2799 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
2800 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
2801
2802 operand0.value = 0;
2803 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2804 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
2805 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2806 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2807 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
2808 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2809 operand0.swizzleX = 0;
2810 operand0.swizzleY = 1;
2811 operand0.swizzleZ = 2;
2812 operand0.swizzleW = 3;
2813
2814 /**
2815 * Emit declaration for constant buffer [0]. We also allocate
2816 * room for the extra constants here.
2817 */
2818 total_consts = emit->num_shader_consts[0];
2819
2820 /* Now, allocate constant slots for the "extra" constants */
2821
2822 /* Vertex position scale/translation */
2823 if (emit->vposition.need_prescale) {
2824 emit->vposition.prescale_scale_index = total_consts++;
2825 emit->vposition.prescale_trans_index = total_consts++;
2826 }
2827
2828 if (emit->unit == PIPE_SHADER_VERTEX) {
2829 if (emit->key.vs.undo_viewport) {
2830 emit->vs.viewport_index = total_consts++;
2831 }
2832 }
2833
2834 /* user-defined clip planes */
2835 if (emit->key.clip_plane_enable) {
2836 unsigned n = util_bitcount(emit->key.clip_plane_enable);
2837 assert(emit->unit == PIPE_SHADER_VERTEX ||
2838 emit->unit == PIPE_SHADER_GEOMETRY);
2839 for (i = 0; i < n; i++) {
2840 emit->clip_plane_const[i] = total_consts++;
2841 }
2842 }
2843
2844 /* Texcoord scale factors for RECT textures */
2845 {
2846 for (i = 0; i < emit->num_samplers; i++) {
2847 if (emit->key.tex[i].unnormalized) {
2848 emit->texcoord_scale_index[i] = total_consts++;
2849 }
2850 }
2851 }
2852
2853 /* Texture buffer sizes */
2854 for (i = 0; i < emit->num_samplers; i++) {
2855 if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
2856 emit->texture_buffer_size_index[i] = total_consts++;
2857 }
2858 }
2859
2860 if (total_consts > 0) {
2861 begin_emit_instruction(emit);
2862 emit_dword(emit, opcode0.value);
2863 emit_dword(emit, operand0.value);
2864 emit_dword(emit, 0); /* which const buffer slot */
2865 emit_dword(emit, total_consts);
2866 end_emit_instruction(emit);
2867 }
2868
2869 /* Declare remaining constant buffers (UBOs) */
2870 for (i = 1; i < Elements(emit->num_shader_consts); i++) {
2871 if (emit->num_shader_consts[i] > 0) {
2872 begin_emit_instruction(emit);
2873 emit_dword(emit, opcode0.value);
2874 emit_dword(emit, operand0.value);
2875 emit_dword(emit, i); /* which const buffer slot */
2876 emit_dword(emit, emit->num_shader_consts[i]);
2877 end_emit_instruction(emit);
2878 }
2879 }
2880
2881 return TRUE;
2882 }
2883
2884
2885 /**
2886 * Emit declarations for samplers.
2887 */
2888 static boolean
2889 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
2890 {
2891 unsigned i;
2892
2893 for (i = 0; i < emit->num_samplers; i++) {
2894 VGPU10OpcodeToken0 opcode0;
2895 VGPU10OperandToken0 operand0;
2896
2897 opcode0.value = 0;
2898 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
2899 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
2900
2901 operand0.value = 0;
2902 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2903 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2904 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2905 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
2906
2907 begin_emit_instruction(emit);
2908 emit_dword(emit, opcode0.value);
2909 emit_dword(emit, operand0.value);
2910 emit_dword(emit, i);
2911 end_emit_instruction(emit);
2912 }
2913
2914 return TRUE;
2915 }
2916
2917
2918 /**
2919 * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
2920 */
2921 static unsigned
2922 pipe_texture_to_resource_dimension(unsigned target, bool msaa)
2923 {
2924 switch (target) {
2925 case PIPE_BUFFER:
2926 return VGPU10_RESOURCE_DIMENSION_BUFFER;
2927 case PIPE_TEXTURE_1D:
2928 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
2929 case PIPE_TEXTURE_2D:
2930 case PIPE_TEXTURE_RECT:
2931 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
2932 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2933 case PIPE_TEXTURE_3D:
2934 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
2935 case PIPE_TEXTURE_CUBE:
2936 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
2937 case PIPE_TEXTURE_1D_ARRAY:
2938 return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
2939 case PIPE_TEXTURE_2D_ARRAY:
2940 return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
2941 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
2942 case PIPE_TEXTURE_CUBE_ARRAY:
2943 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
2944 default:
2945 assert(!"Unexpected resource type");
2946 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
2947 }
2948 }
2949
2950
2951 /**
2952 * Given a tgsi_return_type, return true iff it is an integer type.
2953 */
2954 static boolean
2955 is_integer_type(enum tgsi_return_type type)
2956 {
2957 switch (type) {
2958 case TGSI_RETURN_TYPE_SINT:
2959 case TGSI_RETURN_TYPE_UINT:
2960 return TRUE;
2961 case TGSI_RETURN_TYPE_FLOAT:
2962 case TGSI_RETURN_TYPE_UNORM:
2963 case TGSI_RETURN_TYPE_SNORM:
2964 return FALSE;
2965 case TGSI_RETURN_TYPE_COUNT:
2966 default:
2967 assert(!"is_integer_type: Unknown tgsi_return_type");
2968 return FALSE;
2969 }
2970 }
2971
2972
2973 /**
2974 * Emit declarations for resources.
2975 * XXX When we're sure that all TGSI shaders will be generated with
2976 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
2977 * rework this code.
2978 */
2979 static boolean
2980 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
2981 {
2982 unsigned i;
2983
2984 /* Emit resource decl for each sampler */
2985 for (i = 0; i < emit->num_samplers; i++) {
2986 VGPU10OpcodeToken0 opcode0;
2987 VGPU10OperandToken0 operand0;
2988 VGPU10ResourceReturnTypeToken return_type;
2989 VGPU10_RESOURCE_RETURN_TYPE rt;
2990
2991 opcode0.value = 0;
2992 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
2993 opcode0.resourceDimension =
2994 pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
2995 emit->key.tex[i].texture_msaa);
2996 operand0.value = 0;
2997 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2998 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2999 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3000 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3001
3002 #if 1
3003 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
3004 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
3005 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
3006 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
3007 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
3008 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
3009 assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
3010 rt = emit->key.tex[i].return_type + 1;
3011 #else
3012 switch (emit->key.tex[i].return_type) {
3013 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
3014 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
3015 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
3016 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
3017 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
3018 case TGSI_RETURN_TYPE_COUNT:
3019 default:
3020 rt = VGPU10_RETURN_TYPE_FLOAT;
3021 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
3022 }
3023 #endif
3024
3025 return_type.value = 0;
3026 return_type.component0 = rt;
3027 return_type.component1 = rt;
3028 return_type.component2 = rt;
3029 return_type.component3 = rt;
3030
3031 begin_emit_instruction(emit);
3032 emit_dword(emit, opcode0.value);
3033 emit_dword(emit, operand0.value);
3034 emit_dword(emit, i);
3035 emit_dword(emit, return_type.value);
3036 end_emit_instruction(emit);
3037 }
3038
3039 return TRUE;
3040 }
3041
3042 static void
3043 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
3044 unsigned opcode,
3045 const struct tgsi_full_dst_register *dst,
3046 const struct tgsi_full_src_register *src,
3047 boolean saturate)
3048 {
3049 begin_emit_instruction(emit);
3050 emit_opcode(emit, opcode, saturate);
3051 emit_dst_register(emit, dst);
3052 emit_src_register(emit, src);
3053 end_emit_instruction(emit);
3054 }
3055
3056 static void
3057 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
3058 unsigned opcode,
3059 const struct tgsi_full_dst_register *dst,
3060 const struct tgsi_full_src_register *src1,
3061 const struct tgsi_full_src_register *src2,
3062 boolean saturate)
3063 {
3064 begin_emit_instruction(emit);
3065 emit_opcode(emit, opcode, saturate);
3066 emit_dst_register(emit, dst);
3067 emit_src_register(emit, src1);
3068 emit_src_register(emit, src2);
3069 end_emit_instruction(emit);
3070 }
3071
3072 static void
3073 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
3074 unsigned opcode,
3075 const struct tgsi_full_dst_register *dst,
3076 const struct tgsi_full_src_register *src1,
3077 const struct tgsi_full_src_register *src2,
3078 const struct tgsi_full_src_register *src3,
3079 boolean saturate)
3080 {
3081 begin_emit_instruction(emit);
3082 emit_opcode(emit, opcode, saturate);
3083 emit_dst_register(emit, dst);
3084 emit_src_register(emit, src1);
3085 emit_src_register(emit, src2);
3086 emit_src_register(emit, src3);
3087 end_emit_instruction(emit);
3088 }
3089
3090 /**
3091 * Emit the actual clip distance instructions to be used for clipping
3092 * by copying the clip distance from the temporary registers to the
3093 * CLIPDIST registers written with the enabled planes mask.
3094 * Also copy the clip distance from the temporary to the clip distance
3095 * shadow copy register which will be referenced by the input shader
3096 */
3097 static void
3098 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
3099 {
3100 struct tgsi_full_src_register tmp_clip_dist_src;
3101 struct tgsi_full_dst_register clip_dist_dst;
3102
3103 unsigned i;
3104 unsigned clip_plane_enable = emit->key.clip_plane_enable;
3105 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
3106 int num_written_clipdist = emit->info.num_written_clipdistance;
3107
3108 assert(emit->clip_dist_out_index != INVALID_INDEX);
3109 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
3110
3111 /**
3112 * Temporary reset the temporary clip dist register index so
3113 * that the copy to the real clip dist register will not
3114 * attempt to copy to the temporary register again
3115 */
3116 emit->clip_dist_tmp_index = INVALID_INDEX;
3117
3118 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
3119
3120 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
3121
3122 /**
3123 * copy to the shadow copy for use by varying variable and
3124 * stream output. All clip distances
3125 * will be written regardless of the enabled clipping planes.
3126 */
3127 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3128 emit->clip_dist_so_index + i);
3129
3130 /* MOV clip_dist_so, tmp_clip_dist */
3131 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3132 &tmp_clip_dist_src, FALSE);
3133
3134 /**
3135 * copy those clip distances to enabled clipping planes
3136 * to CLIPDIST registers for clipping
3137 */
3138 if (clip_plane_enable & 0xf) {
3139 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
3140 emit->clip_dist_out_index + i);
3141 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
3142
3143 /* MOV CLIPDIST, tmp_clip_dist */
3144 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
3145 &tmp_clip_dist_src, FALSE);
3146 }
3147 /* four clip planes per clip register */
3148 clip_plane_enable >>= 4;
3149 }
3150 /**
3151 * set the temporary clip dist register index back to the
3152 * temporary index for the next vertex
3153 */
3154 emit->clip_dist_tmp_index = clip_dist_tmp_index;
3155 }
3156
3157 /* Declare clip distance output registers for user-defined clip planes
3158 * or the TGSI_CLIPVERTEX output.
3159 */
3160 static void
3161 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
3162 {
3163 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3164 unsigned index = emit->num_outputs;
3165 unsigned plane_mask;
3166
3167 assert(emit->unit == PIPE_SHADER_VERTEX ||
3168 emit->unit == PIPE_SHADER_GEOMETRY);
3169 assert(num_clip_planes <= 8);
3170
3171 if (emit->clip_mode != CLIP_LEGACY &&
3172 emit->clip_mode != CLIP_VERTEX) {
3173 return;
3174 }
3175
3176 if (num_clip_planes == 0)
3177 return;
3178
3179 /* Declare one or two clip output registers. The number of components
3180 * in the mask reflects the number of clip planes. For example, if 5
3181 * clip planes are needed, we'll declare outputs similar to:
3182 * dcl_output_siv o2.xyzw, clip_distance
3183 * dcl_output_siv o3.x, clip_distance
3184 */
3185 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
3186
3187 plane_mask = (1 << num_clip_planes) - 1;
3188 if (plane_mask & 0xf) {
3189 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3190 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
3191 VGPU10_NAME_CLIP_DISTANCE, cmask);
3192 emit->num_outputs++;
3193 }
3194 if (plane_mask & 0xf0) {
3195 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3196 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
3197 VGPU10_NAME_CLIP_DISTANCE, cmask);
3198 emit->num_outputs++;
3199 }
3200 }
3201
3202
3203 /**
3204 * Emit the instructions for writing to the clip distance registers
3205 * to handle legacy/automatic clip planes.
3206 * For each clip plane, the distance is the dot product of the vertex
3207 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
3208 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
3209 * output registers already declared.
3210 */
3211 static void
3212 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
3213 unsigned vpos_tmp_index)
3214 {
3215 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
3216
3217 assert(emit->clip_mode == CLIP_LEGACY);
3218 assert(num_clip_planes <= 8);
3219
3220 assert(emit->unit == PIPE_SHADER_VERTEX ||
3221 emit->unit == PIPE_SHADER_GEOMETRY);
3222
3223 for (i = 0; i < num_clip_planes; i++) {
3224 struct tgsi_full_dst_register dst;
3225 struct tgsi_full_src_register plane_src, vpos_src;
3226 unsigned reg_index = emit->clip_dist_out_index + i / 4;
3227 unsigned comp = i % 4;
3228 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3229
3230 /* create dst, src regs */
3231 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3232 dst = writemask_dst(&dst, writemask);
3233
3234 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3235 vpos_src = make_src_temp_reg(vpos_tmp_index);
3236
3237 /* DP4 clip_dist, plane, vpos */
3238 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3239 &plane_src, &vpos_src, FALSE);
3240 }
3241 }
3242
3243
3244 /**
3245 * Emit the instructions for computing the clip distance results from
3246 * the clip vertex temporary.
3247 * For each clip plane, the distance is the dot product of the clip vertex
3248 * position (found in a temp reg) and the clip plane coefficients.
3249 */
3250 static void
3251 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
3252 {
3253 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
3254 unsigned i;
3255 struct tgsi_full_dst_register dst;
3256 struct tgsi_full_src_register clipvert_src;
3257 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
3258
3259 assert(emit->unit == PIPE_SHADER_VERTEX ||
3260 emit->unit == PIPE_SHADER_GEOMETRY);
3261
3262 assert(emit->clip_mode == CLIP_VERTEX);
3263
3264 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
3265
3266 for (i = 0; i < num_clip; i++) {
3267 struct tgsi_full_src_register plane_src;
3268 unsigned reg_index = emit->clip_dist_out_index + i / 4;
3269 unsigned comp = i % 4;
3270 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
3271
3272 /* create dst, src regs */
3273 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
3274 dst = writemask_dst(&dst, writemask);
3275
3276 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
3277
3278 /* DP4 clip_dist, plane, vpos */
3279 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
3280 &plane_src, &clipvert_src, FALSE);
3281 }
3282
3283 /* copy temporary clip vertex register to the clip vertex register */
3284
3285 assert(emit->clip_vertex_out_index != INVALID_INDEX);
3286
3287 /**
3288 * temporary reset the temporary clip vertex register index so
3289 * that copy to the clip vertex register will not attempt
3290 * to copy to the temporary register again
3291 */
3292 emit->clip_vertex_tmp_index = INVALID_INDEX;
3293
3294 /* MOV clip_vertex, clip_vertex_tmp */
3295 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
3296 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
3297 &dst, &clipvert_src, FALSE);
3298
3299 /**
3300 * set the temporary clip vertex register index back to the
3301 * temporary index for the next vertex
3302 */
3303 emit->clip_vertex_tmp_index = clip_vertex_tmp;
3304 }
3305
3306 /**
3307 * Emit code to convert RGBA to BGRA
3308 */
3309 static void
3310 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
3311 const struct tgsi_full_dst_register *dst,
3312 const struct tgsi_full_src_register *src)
3313 {
3314 struct tgsi_full_src_register bgra_src =
3315 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
3316
3317 begin_emit_instruction(emit);
3318 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
3319 emit_dst_register(emit, dst);
3320 emit_src_register(emit, &bgra_src);
3321 end_emit_instruction(emit);
3322 }
3323
3324
3325 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
3326 static void
3327 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
3328 const struct tgsi_full_dst_register *dst,
3329 const struct tgsi_full_src_register *src)
3330 {
3331 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
3332 struct tgsi_full_src_register two =
3333 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
3334 struct tgsi_full_src_register neg_two =
3335 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
3336
3337 unsigned val_tmp = get_temp_index(emit);
3338 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
3339 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
3340
3341 unsigned bias_tmp = get_temp_index(emit);
3342 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
3343 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
3344
3345 /* val = src * 2.0 */
3346 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst,
3347 src, &two, FALSE);
3348
3349 /* bias = src > 0.5 */
3350 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst,
3351 src, &half, FALSE);
3352
3353 /* bias = bias & -2.0 */
3354 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
3355 &bias_src, &neg_two, FALSE);
3356
3357 /* dst = val + bias */
3358 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
3359 &val_src, &bias_src, FALSE);
3360
3361 free_temp_indexes(emit);
3362 }
3363
3364
3365 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
3366 static void
3367 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
3368 const struct tgsi_full_dst_register *dst,
3369 const struct tgsi_full_src_register *src)
3370 {
3371 struct tgsi_full_src_register scale =
3372 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
3373
3374 /* dst = src * scale */
3375 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE);
3376 }
3377
3378
3379 /** Convert from R32_UINT to 10_10_10_2_sscaled */
3380 static void
3381 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
3382 const struct tgsi_full_dst_register *dst,
3383 const struct tgsi_full_src_register *src)
3384 {
3385 struct tgsi_full_src_register lshift =
3386 make_immediate_reg_int4(emit, 22, 12, 2, 0);
3387 struct tgsi_full_src_register rshift =
3388 make_immediate_reg_int4(emit, 22, 22, 22, 30);
3389
3390 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
3391
3392 unsigned tmp = get_temp_index(emit);
3393 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3394 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3395
3396 /*
3397 * r = (pixel << 22) >> 22; # signed int in [511, -512]
3398 * g = (pixel << 12) >> 22; # signed int in [511, -512]
3399 * b = (pixel << 2) >> 22; # signed int in [511, -512]
3400 * a = (pixel << 0) >> 30; # signed int in [1, -2]
3401 * dst = i_to_f(r,g,b,a); # convert to float
3402 */
3403 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
3404 &src_xxxx, &lshift, FALSE);
3405 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
3406 &tmp_src, &rshift, FALSE);
3407 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE);
3408
3409 free_temp_indexes(emit);
3410 }
3411
3412
3413 /**
3414 * Emit code for TGSI_OPCODE_ABS instruction.
3415 */
3416 static boolean
3417 emit_abs(struct svga_shader_emitter_v10 *emit,
3418 const struct tgsi_full_instruction *inst)
3419 {
3420 /* dst = ABS(s0):
3421 * dst = abs(s0)
3422 * Translates into:
3423 * MOV dst, abs(s0)
3424 */
3425 struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
3426
3427 /* MOV dst, abs(s0) */
3428 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
3429 &abs_src0, inst->Instruction.Saturate);
3430
3431 return TRUE;
3432 }
3433
3434
3435 /**
3436 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
3437 */
3438 static boolean
3439 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
3440 const struct tgsi_full_instruction *inst)
3441 {
3442 unsigned index = inst->Dst[0].Register.Index;
3443 struct tgsi_full_dst_register dst;
3444 unsigned opcode;
3445
3446 assert(index < MAX_VGPU10_ADDR_REGS);
3447 dst = make_dst_temp_reg(emit->address_reg_index[index]);
3448
3449 /* ARL dst, s0
3450 * Translates into:
3451 * FTOI address_tmp, s0
3452 *
3453 * UARL dst, s0
3454 * Translates into:
3455 * MOV address_tmp, s0
3456 */
3457 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
3458 opcode = VGPU10_OPCODE_FTOI;
3459 else
3460 opcode = VGPU10_OPCODE_MOV;
3461
3462 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE);
3463
3464 return TRUE;
3465 }
3466
3467
3468 /**
3469 * Emit code for TGSI_OPCODE_CAL instruction.
3470 */
3471 static boolean
3472 emit_cal(struct svga_shader_emitter_v10 *emit,
3473 const struct tgsi_full_instruction *inst)
3474 {
3475 unsigned label = inst->Label.Label;
3476 VGPU10OperandToken0 operand;
3477 operand.value = 0;
3478 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
3479
3480 begin_emit_instruction(emit);
3481 emit_dword(emit, operand.value);
3482 emit_dword(emit, label);
3483 end_emit_instruction(emit);
3484
3485 return TRUE;
3486 }
3487
3488
3489 /**
3490 * Emit code for TGSI_OPCODE_IABS instruction.
3491 */
3492 static boolean
3493 emit_iabs(struct svga_shader_emitter_v10 *emit,
3494 const struct tgsi_full_instruction *inst)
3495 {
3496 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
3497 * dst.y = (src0.y < 0) ? -src0.y : src0.y
3498 * dst.z = (src0.z < 0) ? -src0.z : src0.z
3499 * dst.w = (src0.w < 0) ? -src0.w : src0.w
3500 *
3501 * Translates into
3502 * IMAX dst, src, neg(src)
3503 */
3504 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
3505 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
3506 &inst->Src[0], &neg_src, FALSE);
3507
3508 return TRUE;
3509 }
3510
3511
3512 /**
3513 * Emit code for TGSI_OPCODE_CMP instruction.
3514 */
3515 static boolean
3516 emit_cmp(struct svga_shader_emitter_v10 *emit,
3517 const struct tgsi_full_instruction *inst)
3518 {
3519 /* dst.x = (src0.x < 0) ? src1.x : src2.x
3520 * dst.y = (src0.y < 0) ? src1.y : src2.y
3521 * dst.z = (src0.z < 0) ? src1.z : src2.z
3522 * dst.w = (src0.w < 0) ? src1.w : src2.w
3523 *
3524 * Translates into
3525 * LT tmp, src0, 0.0
3526 * MOVC dst, tmp, src1, src2
3527 */
3528 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3529 unsigned tmp = get_temp_index(emit);
3530 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3531 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3532
3533 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst,
3534 &inst->Src[0], &zero, FALSE);
3535 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
3536 &tmp_src, &inst->Src[1], &inst->Src[2],
3537 inst->Instruction.Saturate);
3538
3539 free_temp_indexes(emit);
3540
3541 return TRUE;
3542 }
3543
3544
3545 /**
3546 * Emit code for TGSI_OPCODE_DP2A instruction.
3547 */
3548 static boolean
3549 emit_dp2a(struct svga_shader_emitter_v10 *emit,
3550 const struct tgsi_full_instruction *inst)
3551 {
3552 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
3553 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
3554 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
3555 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
3556 * Translate into
3557 * MAD tmp.x, s0.y, s1.y, s2.x
3558 * MAD tmp.x, s0.x, s1.x, tmp.x
3559 * MOV dst.xyzw, tmp.xxxx
3560 */
3561 unsigned tmp = get_temp_index(emit);
3562 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3563 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3564
3565 struct tgsi_full_src_register tmp_src_xxxx =
3566 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3567 struct tgsi_full_dst_register tmp_dst_x =
3568 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3569
3570 struct tgsi_full_src_register src0_xxxx =
3571 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
3572 struct tgsi_full_src_register src0_yyyy =
3573 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
3574 struct tgsi_full_src_register src1_xxxx =
3575 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
3576 struct tgsi_full_src_register src1_yyyy =
3577 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
3578 struct tgsi_full_src_register src2_xxxx =
3579 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
3580
3581 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
3582 &src1_yyyy, &src2_xxxx, FALSE);
3583 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
3584 &src1_xxxx, &tmp_src_xxxx, FALSE);
3585 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
3586 &tmp_src_xxxx, inst->Instruction.Saturate);
3587
3588 free_temp_indexes(emit);
3589
3590 return TRUE;
3591 }
3592
3593
3594 /**
3595 * Emit code for TGSI_OPCODE_DPH instruction.
3596 */
3597 static boolean
3598 emit_dph(struct svga_shader_emitter_v10 *emit,
3599 const struct tgsi_full_instruction *inst)
3600 {
3601 /*
3602 * DP3 tmp, s0, s1
3603 * ADD dst, tmp, s1.wwww
3604 */
3605
3606 struct tgsi_full_src_register s1_wwww =
3607 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
3608 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
3609
3610 unsigned tmp = get_temp_index(emit);
3611 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3612 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3613
3614 /* DP3 tmp, s0, s1 */
3615 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
3616 &inst->Src[1], FALSE);
3617
3618 /* ADD dst, tmp, s1.wwww */
3619 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
3620 &s1_wwww, inst->Instruction.Saturate);
3621
3622 free_temp_indexes(emit);
3623
3624 return TRUE;
3625 }
3626
3627
3628 /**
3629 * Emit code for TGSI_OPCODE_DST instruction.
3630 */
3631 static boolean
3632 emit_dst(struct svga_shader_emitter_v10 *emit,
3633 const struct tgsi_full_instruction *inst)
3634 {
3635 /*
3636 * dst.x = 1
3637 * dst.y = src0.y * src1.y
3638 * dst.z = src0.z
3639 * dst.w = src1.w
3640 */
3641
3642 struct tgsi_full_src_register s0_yyyy =
3643 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
3644 struct tgsi_full_src_register s0_zzzz =
3645 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
3646 struct tgsi_full_src_register s1_yyyy =
3647 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
3648 struct tgsi_full_src_register s1_wwww =
3649 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
3650
3651 /*
3652 * If dst and either src0 and src1 are the same we need
3653 * to create a temporary for it and insert a extra move.
3654 */
3655 unsigned tmp_move = get_temp_index(emit);
3656 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3657 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3658
3659 /* MOV dst.x, 1.0 */
3660 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3661 struct tgsi_full_dst_register dst_x =
3662 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3663 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3664
3665 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3666 }
3667
3668 /* MUL dst.y, s0.y, s1.y */
3669 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3670 struct tgsi_full_dst_register dst_y =
3671 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3672
3673 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
3674 &s1_yyyy, inst->Instruction.Saturate);
3675 }
3676
3677 /* MOV dst.z, s0.z */
3678 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3679 struct tgsi_full_dst_register dst_z =
3680 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3681
3682 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz,
3683 inst->Instruction.Saturate);
3684 }
3685
3686 /* MOV dst.w, s1.w */
3687 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3688 struct tgsi_full_dst_register dst_w =
3689 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3690
3691 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww,
3692 inst->Instruction.Saturate);
3693 }
3694
3695 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3696 FALSE);
3697 free_temp_indexes(emit);
3698
3699 return TRUE;
3700 }
3701
3702
3703
3704 /**
3705 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
3706 */
3707 static boolean
3708 emit_endprim(struct svga_shader_emitter_v10 *emit,
3709 const struct tgsi_full_instruction *inst)
3710 {
3711 assert(emit->unit == PIPE_SHADER_GEOMETRY);
3712
3713 /* We can't use emit_simple() because the TGSI instruction has one
3714 * operand (vertex stream number) which we must ignore for VGPU10.
3715 */
3716 begin_emit_instruction(emit);
3717 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
3718 end_emit_instruction(emit);
3719 return TRUE;
3720 }
3721
3722
3723 /**
3724 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
3725 */
3726 static boolean
3727 emit_ex2(struct svga_shader_emitter_v10 *emit,
3728 const struct tgsi_full_instruction *inst)
3729 {
3730 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
3731 * while VGPU10 computes four values.
3732 *
3733 * dst = EX2(src):
3734 * dst.xyzw = 2.0 ^ src.x
3735 */
3736
3737 struct tgsi_full_src_register src_xxxx =
3738 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3739 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3740
3741 /* EXP tmp, s0.xxxx */
3742 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
3743 inst->Instruction.Saturate);
3744
3745 return TRUE;
3746 }
3747
3748
3749 /**
3750 * Emit code for TGSI_OPCODE_EXP instruction.
3751 */
3752 static boolean
3753 emit_exp(struct svga_shader_emitter_v10 *emit,
3754 const struct tgsi_full_instruction *inst)
3755 {
3756 /*
3757 * dst.x = 2 ^ floor(s0.x)
3758 * dst.y = s0.x - floor(s0.x)
3759 * dst.z = 2 ^ s0.x
3760 * dst.w = 1.0
3761 */
3762
3763 struct tgsi_full_src_register src_xxxx =
3764 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
3765 unsigned tmp = get_temp_index(emit);
3766 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3767 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3768
3769 /*
3770 * If dst and src are the same we need to create
3771 * a temporary for it and insert a extra move.
3772 */
3773 unsigned tmp_move = get_temp_index(emit);
3774 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3775 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3776
3777 /* only use X component of temp reg */
3778 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3779 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3780
3781 /* ROUND_NI tmp.x, s0.x */
3782 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
3783 &src_xxxx, FALSE); /* round to -infinity */
3784
3785 /* EXP dst.x, tmp.x */
3786 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3787 struct tgsi_full_dst_register dst_x =
3788 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3789
3790 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
3791 inst->Instruction.Saturate);
3792 }
3793
3794 /* ADD dst.y, s0.x, -tmp */
3795 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3796 struct tgsi_full_dst_register dst_y =
3797 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
3798 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
3799
3800 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
3801 &neg_tmp_src, inst->Instruction.Saturate);
3802 }
3803
3804 /* EXP dst.z, s0.x */
3805 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3806 struct tgsi_full_dst_register dst_z =
3807 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
3808
3809 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
3810 inst->Instruction.Saturate);
3811 }
3812
3813 /* MOV dst.w, 1.0 */
3814 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3815 struct tgsi_full_dst_register dst_w =
3816 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3817 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3818
3819 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one,
3820 FALSE);
3821 }
3822
3823 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
3824 FALSE);
3825
3826 free_temp_indexes(emit);
3827
3828 return TRUE;
3829 }
3830
3831
3832 /**
3833 * Emit code for TGSI_OPCODE_IF instruction.
3834 */
3835 static boolean
3836 emit_if(struct svga_shader_emitter_v10 *emit,
3837 const struct tgsi_full_instruction *inst)
3838 {
3839 VGPU10OpcodeToken0 opcode0;
3840
3841 /* The src register should be a scalar */
3842 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY &&
3843 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ &&
3844 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW);
3845
3846 /* The only special thing here is that we need to set the
3847 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
3848 * src.x is non-zero.
3849 */
3850 opcode0.value = 0;
3851 opcode0.opcodeType = VGPU10_OPCODE_IF;
3852 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
3853
3854 begin_emit_instruction(emit);
3855 emit_dword(emit, opcode0.value);
3856 emit_src_register(emit, &inst->Src[0]);
3857 end_emit_instruction(emit);
3858
3859 return TRUE;
3860 }
3861
3862
3863 /**
3864 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
3865 * the register components are negative).
3866 */
3867 static boolean
3868 emit_kill_if(struct svga_shader_emitter_v10 *emit,
3869 const struct tgsi_full_instruction *inst)
3870 {
3871 unsigned tmp = get_temp_index(emit);
3872 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
3873 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
3874
3875 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3876
3877 struct tgsi_full_dst_register tmp_dst_x =
3878 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
3879 struct tgsi_full_src_register tmp_src_xxxx =
3880 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
3881
3882 /* tmp = src[0] < 0.0 */
3883 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
3884 &zero, FALSE);
3885
3886 if (!same_swizzle_terms(&inst->Src[0])) {
3887 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
3888 * logically OR the swizzle terms. Most uses of KILL_IF only
3889 * test one channel so it's good to avoid these extra steps.
3890 */
3891 struct tgsi_full_src_register tmp_src_yyyy =
3892 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
3893 struct tgsi_full_src_register tmp_src_zzzz =
3894 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
3895 struct tgsi_full_src_register tmp_src_wwww =
3896 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
3897
3898 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3899 &tmp_src_yyyy, FALSE);
3900 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3901 &tmp_src_zzzz, FALSE);
3902 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
3903 &tmp_src_wwww, FALSE);
3904 }
3905
3906 begin_emit_instruction(emit);
3907 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
3908 emit_src_register(emit, &tmp_src_xxxx);
3909 end_emit_instruction(emit);
3910
3911 free_temp_indexes(emit);
3912
3913 return TRUE;
3914 }
3915
3916
3917 /**
3918 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
3919 */
3920 static boolean
3921 emit_kill(struct svga_shader_emitter_v10 *emit,
3922 const struct tgsi_full_instruction *inst)
3923 {
3924 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
3925
3926 /* DISCARD if 0.0 is zero */
3927 begin_emit_instruction(emit);
3928 emit_discard_opcode(emit, FALSE);
3929 emit_src_register(emit, &zero);
3930 end_emit_instruction(emit);
3931
3932 return TRUE;
3933 }
3934
3935
3936 /**
3937 * Emit code for TGSI_OPCODE_LG2 instruction.
3938 */
3939 static boolean
3940 emit_lg2(struct svga_shader_emitter_v10 *emit,
3941 const struct tgsi_full_instruction *inst)
3942 {
3943 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
3944 * while VGPU10 computes four values.
3945 *
3946 * dst = LG2(src):
3947 * dst.xyzw = log2(src.x)
3948 */
3949
3950 struct tgsi_full_src_register src_xxxx =
3951 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
3952 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
3953
3954 /* LOG tmp, s0.xxxx */
3955 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx,
3956 inst->Instruction.Saturate);
3957
3958 return TRUE;
3959 }
3960
3961
3962 /**
3963 * Emit code for TGSI_OPCODE_LIT instruction.
3964 */
3965 static boolean
3966 emit_lit(struct svga_shader_emitter_v10 *emit,
3967 const struct tgsi_full_instruction *inst)
3968 {
3969 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
3970
3971 /*
3972 * If dst and src are the same we need to create
3973 * a temporary for it and insert a extra move.
3974 */
3975 unsigned tmp_move = get_temp_index(emit);
3976 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
3977 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
3978
3979 /*
3980 * dst.x = 1
3981 * dst.y = max(src.x, 0)
3982 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
3983 * dst.w = 1
3984 */
3985
3986 /* MOV dst.x, 1.0 */
3987 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3988 struct tgsi_full_dst_register dst_x =
3989 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
3990 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE);
3991 }
3992
3993 /* MOV dst.w, 1.0 */
3994 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3995 struct tgsi_full_dst_register dst_w =
3996 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
3997 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
3998 }
3999
4000 /* MAX dst.y, src.x, 0.0 */
4001 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
4002 struct tgsi_full_dst_register dst_y =
4003 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
4004 struct tgsi_full_src_register zero =
4005 make_immediate_reg_float(emit, 0.0f);
4006 struct tgsi_full_src_register src_xxxx =
4007 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4008 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4009
4010 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
4011 &zero, inst->Instruction.Saturate);
4012 }
4013
4014 /*
4015 * tmp1 = clamp(src.w, -128, 128);
4016 * MAX tmp1, src.w, -128
4017 * MIN tmp1, tmp1, 128
4018 *
4019 * tmp2 = max(tmp2, 0);
4020 * MAX tmp2, src.y, 0
4021 *
4022 * tmp1 = pow(tmp2, tmp1);
4023 * LOG tmp2, tmp2
4024 * MUL tmp1, tmp2, tmp1
4025 * EXP tmp1, tmp1
4026 *
4027 * tmp1 = (src.w == 0) ? 1 : tmp1;
4028 * EQ tmp2, 0, src.w
4029 * MOVC tmp1, tmp2, 1.0, tmp1
4030 *
4031 * dst.z = (0 < src.x) ? tmp1 : 0;
4032 * LT tmp2, 0, src.x
4033 * MOVC dst.z, tmp2, tmp1, 0.0
4034 */
4035 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
4036 struct tgsi_full_dst_register dst_z =
4037 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
4038
4039 unsigned tmp1 = get_temp_index(emit);
4040 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4041 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4042 unsigned tmp2 = get_temp_index(emit);
4043 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4044 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4045
4046 struct tgsi_full_src_register src_xxxx =
4047 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4048 struct tgsi_full_src_register src_yyyy =
4049 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
4050 struct tgsi_full_src_register src_wwww =
4051 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
4052
4053 struct tgsi_full_src_register zero =
4054 make_immediate_reg_float(emit, 0.0f);
4055 struct tgsi_full_src_register lowerbound =
4056 make_immediate_reg_float(emit, -128.0f);
4057 struct tgsi_full_src_register upperbound =
4058 make_immediate_reg_float(emit, 128.0f);
4059
4060 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
4061 &lowerbound, FALSE);
4062 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
4063 &upperbound, FALSE);
4064 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
4065 &zero, FALSE);
4066
4067 /* POW tmp1, tmp2, tmp1 */
4068 /* LOG tmp2, tmp2 */
4069 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src,
4070 FALSE);
4071
4072 /* MUL tmp1, tmp2, tmp1 */
4073 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
4074 &tmp1_src, FALSE);
4075
4076 /* EXP tmp1, tmp1 */
4077 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src,
4078 FALSE);
4079
4080 /* EQ tmp2, 0, src.w */
4081 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero,
4082 &src_wwww, FALSE);
4083 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
4084 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
4085 &tmp2_src, &one, &tmp1_src, FALSE);
4086
4087 /* LT tmp2, 0, src.x */
4088 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero,
4089 &src_xxxx, FALSE);
4090 /* MOVC dst.z, tmp2, tmp1, 0.0 */
4091 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
4092 &tmp2_src, &tmp1_src, &zero, FALSE);
4093 }
4094
4095 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src,
4096 FALSE);
4097 free_temp_indexes(emit);
4098
4099 return TRUE;
4100 }
4101
4102
4103 /**
4104 * Emit code for TGSI_OPCODE_LOG instruction.
4105 */
4106 static boolean
4107 emit_log(struct svga_shader_emitter_v10 *emit,
4108 const struct tgsi_full_instruction *inst)
4109 {
4110 /*
4111 * dst.x = floor(lg2(abs(s0.x)))
4112 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
4113 * dst.z = lg2(abs(s0.x))
4114 * dst.w = 1.0
4115 */
4116
4117 struct tgsi_full_src_register src_xxxx =
4118 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
4119 unsigned tmp = get_temp_index(emit);
4120 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4121 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4122 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
4123
4124 /* only use X component of temp reg */
4125 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4126 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4127
4128 /* LOG tmp.x, abs(s0.x) */
4129 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
4130 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst,
4131 &abs_src_xxxx, FALSE);
4132 }
4133
4134 /* MOV dst.z, tmp.x */
4135 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
4136 struct tgsi_full_dst_register dst_z =
4137 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
4138
4139 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z,
4140 &tmp_src, inst->Instruction.Saturate);
4141 }
4142
4143 /* FLR tmp.x, tmp.x */
4144 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
4145 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
4146 &tmp_src, FALSE);
4147 }
4148
4149 /* MOV dst.x, tmp.x */
4150 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4151 struct tgsi_full_dst_register dst_x =
4152 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
4153
4154 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src,
4155 inst->Instruction.Saturate);
4156 }
4157
4158 /* EXP tmp.x, tmp.x */
4159 /* DIV dst.y, abs(s0.x), tmp.x */
4160 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
4161 struct tgsi_full_dst_register dst_y =
4162 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
4163
4164 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src,
4165 FALSE);
4166 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
4167 &tmp_src, inst->Instruction.Saturate);
4168 }
4169
4170 /* MOV dst.w, 1.0 */
4171 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
4172 struct tgsi_full_dst_register dst_w =
4173 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
4174 struct tgsi_full_src_register one =
4175 make_immediate_reg_float(emit, 1.0f);
4176
4177 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE);
4178 }
4179
4180 free_temp_indexes(emit);
4181
4182 return TRUE;
4183 }
4184
4185
4186 /**
4187 * Emit code for TGSI_OPCODE_LRP instruction.
4188 */
4189 static boolean
4190 emit_lrp(struct svga_shader_emitter_v10 *emit,
4191 const struct tgsi_full_instruction *inst)
4192 {
4193 /* dst = LRP(s0, s1, s2):
4194 * dst = s0 * (s1 - s2) + s2
4195 * Translates into:
4196 * SUB tmp, s1, s2; tmp = s1 - s2
4197 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
4198 */
4199 unsigned tmp = get_temp_index(emit);
4200 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
4201 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
4202 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
4203
4204 /* ADD tmp, s1, -s2 */
4205 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp,
4206 &inst->Src[1], &neg_src2, FALSE);
4207
4208 /* MAD dst, s1, tmp, s3 */
4209 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
4210 &inst->Src[0], &src_tmp, &inst->Src[2],
4211 inst->Instruction.Saturate);
4212
4213 free_temp_indexes(emit);
4214
4215 return TRUE;
4216 }
4217
4218
4219 /**
4220 * Emit code for TGSI_OPCODE_POW instruction.
4221 */
4222 static boolean
4223 emit_pow(struct svga_shader_emitter_v10 *emit,
4224 const struct tgsi_full_instruction *inst)
4225 {
4226 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
4227 * src1.x while VGPU10 computes four values.
4228 *
4229 * dst = POW(src0, src1):
4230 * dst.xyzw = src0.x ^ src1.x
4231 */
4232 unsigned tmp = get_temp_index(emit);
4233 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4234 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4235 struct tgsi_full_src_register src0_xxxx =
4236 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4237 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4238 struct tgsi_full_src_register src1_xxxx =
4239 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
4240 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
4241
4242 /* LOG tmp, s0.xxxx */
4243 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx,
4244 FALSE);
4245
4246 /* MUL tmp, tmp, s1.xxxx */
4247 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src,
4248 &src1_xxxx, FALSE);
4249
4250 /* EXP tmp, s0.xxxx */
4251 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0],
4252 &tmp_src, inst->Instruction.Saturate);
4253
4254 /* free tmp */
4255 free_temp_indexes(emit);
4256
4257 return TRUE;
4258 }
4259
4260
4261 /**
4262 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
4263 */
4264 static boolean
4265 emit_rcp(struct svga_shader_emitter_v10 *emit,
4266 const struct tgsi_full_instruction *inst)
4267 {
4268 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4269
4270 unsigned tmp = get_temp_index(emit);
4271 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4272 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4273
4274 struct tgsi_full_dst_register tmp_dst_x =
4275 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4276 struct tgsi_full_src_register tmp_src_xxxx =
4277 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4278
4279 /* DIV tmp.x, 1.0, s0 */
4280 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one,
4281 &inst->Src[0], FALSE);
4282
4283 /* MOV dst, tmp.xxxx */
4284 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4285 &tmp_src_xxxx, inst->Instruction.Saturate);
4286
4287 free_temp_indexes(emit);
4288
4289 return TRUE;
4290 }
4291
4292
4293 /**
4294 * Emit code for TGSI_OPCODE_RSQ instruction.
4295 */
4296 static boolean
4297 emit_rsq(struct svga_shader_emitter_v10 *emit,
4298 const struct tgsi_full_instruction *inst)
4299 {
4300 /* dst = RSQ(src):
4301 * dst.xyzw = 1 / sqrt(src.x)
4302 * Translates into:
4303 * RSQ tmp, src.x
4304 * MOV dst, tmp.xxxx
4305 */
4306
4307 unsigned tmp = get_temp_index(emit);
4308 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4309 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4310
4311 struct tgsi_full_dst_register tmp_dst_x =
4312 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4313 struct tgsi_full_src_register tmp_src_xxxx =
4314 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4315
4316 /* RSQ tmp, src.x */
4317 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x,
4318 &inst->Src[0], FALSE);
4319
4320 /* MOV dst, tmp.xxxx */
4321 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4322 &tmp_src_xxxx, inst->Instruction.Saturate);
4323
4324 /* free tmp */
4325 free_temp_indexes(emit);
4326
4327 return TRUE;
4328 }
4329
4330
4331 /**
4332 * Emit code for TGSI_OPCODE_SCS instruction.
4333 */
4334 static boolean
4335 emit_scs(struct svga_shader_emitter_v10 *emit,
4336 const struct tgsi_full_instruction *inst)
4337 {
4338 /* dst.x = cos(src.x)
4339 * dst.y = sin(src.x)
4340 * dst.z = 0.0
4341 * dst.w = 1.0
4342 */
4343 struct tgsi_full_dst_register dst_x =
4344 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
4345 struct tgsi_full_dst_register dst_y =
4346 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
4347 struct tgsi_full_dst_register dst_zw =
4348 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
4349
4350 struct tgsi_full_src_register zero_one =
4351 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
4352
4353 begin_emit_instruction(emit);
4354 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
4355 emit_dst_register(emit, &dst_y);
4356 emit_dst_register(emit, &dst_x);
4357 emit_src_register(emit, &inst->Src[0]);
4358 end_emit_instruction(emit);
4359
4360 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
4361 &dst_zw, &zero_one, inst->Instruction.Saturate);
4362
4363 return TRUE;
4364 }
4365
4366
4367 /**
4368 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
4369 */
4370 static boolean
4371 emit_seq(struct svga_shader_emitter_v10 *emit,
4372 const struct tgsi_full_instruction *inst)
4373 {
4374 /* dst = SEQ(s0, s1):
4375 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
4376 * Translates into:
4377 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4378 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4379 */
4380 unsigned tmp = get_temp_index(emit);
4381 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4382 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4383 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4384 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4385
4386 /* EQ tmp, s0, s1 */
4387 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
4388 &inst->Src[1], FALSE);
4389
4390 /* MOVC dst, tmp, one, zero */
4391 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4392 &one, &zero, FALSE);
4393
4394 free_temp_indexes(emit);
4395
4396 return TRUE;
4397 }
4398
4399
4400 /**
4401 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
4402 */
4403 static boolean
4404 emit_sge(struct svga_shader_emitter_v10 *emit,
4405 const struct tgsi_full_instruction *inst)
4406 {
4407 /* dst = SGE(s0, s1):
4408 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
4409 * Translates into:
4410 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
4411 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4412 */
4413 unsigned tmp = get_temp_index(emit);
4414 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4415 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4416 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4417 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4418
4419 /* GE tmp, s0, s1 */
4420 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
4421 &inst->Src[1], FALSE);
4422
4423 /* MOVC dst, tmp, one, zero */
4424 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4425 &one, &zero, FALSE);
4426
4427 free_temp_indexes(emit);
4428
4429 return TRUE;
4430 }
4431
4432
4433 /**
4434 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
4435 */
4436 static boolean
4437 emit_sgt(struct svga_shader_emitter_v10 *emit,
4438 const struct tgsi_full_instruction *inst)
4439 {
4440 /* dst = SGT(s0, s1):
4441 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
4442 * Translates into:
4443 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
4444 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4445 */
4446 unsigned tmp = get_temp_index(emit);
4447 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4448 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4449 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4450 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4451
4452 /* LT tmp, s1, s0 */
4453 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
4454 &inst->Src[0], FALSE);
4455
4456 /* MOVC dst, tmp, one, zero */
4457 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4458 &one, &zero, FALSE);
4459
4460 free_temp_indexes(emit);
4461
4462 return TRUE;
4463 }
4464
4465
4466 /**
4467 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
4468 */
4469 static boolean
4470 emit_sincos(struct svga_shader_emitter_v10 *emit,
4471 const struct tgsi_full_instruction *inst)
4472 {
4473 unsigned tmp = get_temp_index(emit);
4474 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4475 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4476
4477 struct tgsi_full_src_register tmp_src_xxxx =
4478 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
4479 struct tgsi_full_dst_register tmp_dst_x =
4480 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
4481
4482 begin_emit_instruction(emit);
4483 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
4484
4485 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
4486 {
4487 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
4488 emit_null_dst_register(emit); /* second destination register */
4489 }
4490 else {
4491 emit_null_dst_register(emit);
4492 emit_dst_register(emit, &tmp_dst_x);
4493 }
4494
4495 emit_src_register(emit, &inst->Src[0]);
4496 end_emit_instruction(emit);
4497
4498 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
4499 &tmp_src_xxxx, inst->Instruction.Saturate);
4500
4501 free_temp_indexes(emit);
4502
4503 return TRUE;
4504 }
4505
4506
4507 /**
4508 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
4509 */
4510 static boolean
4511 emit_sle(struct svga_shader_emitter_v10 *emit,
4512 const struct tgsi_full_instruction *inst)
4513 {
4514 /* dst = SLE(s0, s1):
4515 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
4516 * Translates into:
4517 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
4518 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4519 */
4520 unsigned tmp = get_temp_index(emit);
4521 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4522 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4523 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4524 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4525
4526 /* GE tmp, s1, s0 */
4527 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
4528 &inst->Src[0], FALSE);
4529
4530 /* MOVC dst, tmp, one, zero */
4531 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4532 &one, &zero, FALSE);
4533
4534 free_temp_indexes(emit);
4535
4536 return TRUE;
4537 }
4538
4539
4540 /**
4541 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
4542 */
4543 static boolean
4544 emit_slt(struct svga_shader_emitter_v10 *emit,
4545 const struct tgsi_full_instruction *inst)
4546 {
4547 /* dst = SLT(s0, s1):
4548 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
4549 * Translates into:
4550 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
4551 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4552 */
4553 unsigned tmp = get_temp_index(emit);
4554 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4555 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4556 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4557 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4558
4559 /* LT tmp, s0, s1 */
4560 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
4561 &inst->Src[1], FALSE);
4562
4563 /* MOVC dst, tmp, one, zero */
4564 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4565 &one, &zero, FALSE);
4566
4567 free_temp_indexes(emit);
4568
4569 return TRUE;
4570 }
4571
4572
4573 /**
4574 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
4575 */
4576 static boolean
4577 emit_sne(struct svga_shader_emitter_v10 *emit,
4578 const struct tgsi_full_instruction *inst)
4579 {
4580 /* dst = SNE(s0, s1):
4581 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
4582 * Translates into:
4583 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
4584 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
4585 */
4586 unsigned tmp = get_temp_index(emit);
4587 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4588 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4589 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4590 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
4591
4592 /* NE tmp, s0, s1 */
4593 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
4594 &inst->Src[1], FALSE);
4595
4596 /* MOVC dst, tmp, one, zero */
4597 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
4598 &one, &zero, FALSE);
4599
4600 free_temp_indexes(emit);
4601
4602 return TRUE;
4603 }
4604
4605
4606 /**
4607 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
4608 */
4609 static boolean
4610 emit_ssg(struct svga_shader_emitter_v10 *emit,
4611 const struct tgsi_full_instruction *inst)
4612 {
4613 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
4614 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
4615 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
4616 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
4617 * Translates into:
4618 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
4619 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
4620 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
4621 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
4622 */
4623 struct tgsi_full_src_register zero =
4624 make_immediate_reg_float(emit, 0.0f);
4625 struct tgsi_full_src_register one =
4626 make_immediate_reg_float(emit, 1.0f);
4627 struct tgsi_full_src_register neg_one =
4628 make_immediate_reg_float(emit, -1.0f);
4629
4630 unsigned tmp1 = get_temp_index(emit);
4631 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4632 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4633
4634 unsigned tmp2 = get_temp_index(emit);
4635 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4636 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4637
4638 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
4639 &zero, FALSE);
4640 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
4641 &neg_one, &zero, FALSE);
4642 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
4643 &inst->Src[0], FALSE);
4644 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
4645 &one, &tmp2_src, FALSE);
4646
4647 free_temp_indexes(emit);
4648
4649 return TRUE;
4650 }
4651
4652
4653 /**
4654 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
4655 */
4656 static boolean
4657 emit_issg(struct svga_shader_emitter_v10 *emit,
4658 const struct tgsi_full_instruction *inst)
4659 {
4660 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
4661 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
4662 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
4663 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
4664 * Translates into:
4665 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
4666 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
4667 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
4668 */
4669 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
4670
4671 unsigned tmp1 = get_temp_index(emit);
4672 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
4673 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
4674
4675 unsigned tmp2 = get_temp_index(emit);
4676 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
4677 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
4678
4679 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
4680
4681 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
4682 &inst->Src[0], &zero, FALSE);
4683 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
4684 &zero, &inst->Src[0], FALSE);
4685 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
4686 &tmp1_src, &neg_tmp2, FALSE);
4687
4688 free_temp_indexes(emit);
4689
4690 return TRUE;
4691 }
4692
4693
4694 /**
4695 * Emit code for TGSI_OPCODE_SUB instruction.
4696 */
4697 static boolean
4698 emit_sub(struct svga_shader_emitter_v10 *emit,
4699 const struct tgsi_full_instruction *inst)
4700 {
4701 /* dst = SUB(s0, s1):
4702 * dst = s0 - s1
4703 * Translates into:
4704 * ADD dst, s0, neg(s1)
4705 */
4706 struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
4707
4708 /* ADD dst, s0, neg(s1) */
4709 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
4710 &inst->Src[0], &neg_src1,
4711 inst->Instruction.Saturate);
4712
4713 return TRUE;
4714 }
4715
4716
4717 /**
4718 * Emit a comparison instruction. The dest register will get
4719 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
4720 */
4721 static void
4722 emit_comparison(struct svga_shader_emitter_v10 *emit,
4723 SVGA3dCmpFunc func,
4724 const struct tgsi_full_dst_register *dst,
4725 const struct tgsi_full_src_register *src0,
4726 const struct tgsi_full_src_register *src1)
4727 {
4728 struct tgsi_full_src_register immediate;
4729 VGPU10OpcodeToken0 opcode0;
4730 boolean swapSrc = FALSE;
4731
4732 /* Sanity checks for svga vs. gallium enums */
4733 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
4734 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
4735
4736 opcode0.value = 0;
4737
4738 switch (func) {
4739 case SVGA3D_CMP_NEVER:
4740 immediate = make_immediate_reg_int(emit, 0);
4741 /* MOV dst, {0} */
4742 begin_emit_instruction(emit);
4743 emit_dword(emit, VGPU10_OPCODE_MOV);
4744 emit_dst_register(emit, dst);
4745 emit_src_register(emit, &immediate);
4746 end_emit_instruction(emit);
4747 return;
4748 case SVGA3D_CMP_ALWAYS:
4749 immediate = make_immediate_reg_int(emit, -1);
4750 /* MOV dst, {-1} */
4751 begin_emit_instruction(emit);
4752 emit_dword(emit, VGPU10_OPCODE_MOV);
4753 emit_dst_register(emit, dst);
4754 emit_src_register(emit, &immediate);
4755 end_emit_instruction(emit);
4756 return;
4757 case SVGA3D_CMP_LESS:
4758 opcode0.opcodeType = VGPU10_OPCODE_LT;
4759 break;
4760 case SVGA3D_CMP_EQUAL:
4761 opcode0.opcodeType = VGPU10_OPCODE_EQ;
4762 break;
4763 case SVGA3D_CMP_LESSEQUAL:
4764 opcode0.opcodeType = VGPU10_OPCODE_GE;
4765 swapSrc = TRUE;
4766 break;
4767 case SVGA3D_CMP_GREATER:
4768 opcode0.opcodeType = VGPU10_OPCODE_LT;
4769 swapSrc = TRUE;
4770 break;
4771 case SVGA3D_CMP_NOTEQUAL:
4772 opcode0.opcodeType = VGPU10_OPCODE_NE;
4773 break;
4774 case SVGA3D_CMP_GREATEREQUAL:
4775 opcode0.opcodeType = VGPU10_OPCODE_GE;
4776 break;
4777 default:
4778 assert(!"Unexpected comparison mode");
4779 opcode0.opcodeType = VGPU10_OPCODE_EQ;
4780 }
4781
4782 begin_emit_instruction(emit);
4783 emit_dword(emit, opcode0.value);
4784 emit_dst_register(emit, dst);
4785 if (swapSrc) {
4786 emit_src_register(emit, src1);
4787 emit_src_register(emit, src0);
4788 }
4789 else {
4790 emit_src_register(emit, src0);
4791 emit_src_register(emit, src1);
4792 }
4793 end_emit_instruction(emit);
4794 }
4795
4796
4797 /**
4798 * Get texel/address offsets for a texture instruction.
4799 */
4800 static void
4801 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
4802 const struct tgsi_full_instruction *inst, int offsets[3])
4803 {
4804 if (inst->Texture.NumOffsets == 1) {
4805 /* According to OpenGL Shader Language spec the offsets are only
4806 * fetched from a previously-declared immediate/literal.
4807 */
4808 const struct tgsi_texture_offset *off = inst->TexOffsets;
4809 const unsigned index = off[0].Index;
4810 const unsigned swizzleX = off[0].SwizzleX;
4811 const unsigned swizzleY = off[0].SwizzleY;
4812 const unsigned swizzleZ = off[0].SwizzleZ;
4813 const union tgsi_immediate_data *imm = emit->immediates[index];
4814
4815 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
4816
4817 offsets[0] = imm[swizzleX].Int;
4818 offsets[1] = imm[swizzleY].Int;
4819 offsets[2] = imm[swizzleZ].Int;
4820 }
4821 else {
4822 offsets[0] = offsets[1] = offsets[2] = 0;
4823 }
4824 }
4825
4826
4827 /**
4828 * Set up the coordinate register for texture sampling.
4829 * When we're sampling from a RECT texture we have to scale the
4830 * unnormalized coordinate to a normalized coordinate.
4831 * We do that by multiplying the coordinate by an "extra" constant.
4832 * An alternative would be to use the RESINFO instruction to query the
4833 * texture's size.
4834 */
4835 static struct tgsi_full_src_register
4836 setup_texcoord(struct svga_shader_emitter_v10 *emit,
4837 unsigned unit,
4838 const struct tgsi_full_src_register *coord)
4839 {
4840 if (emit->key.tex[unit].unnormalized) {
4841 unsigned scale_index = emit->texcoord_scale_index[unit];
4842 unsigned tmp = get_temp_index(emit);
4843 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
4844 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
4845 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
4846
4847 /* MUL tmp, coord, const[] */
4848 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
4849 coord, &scale_src, FALSE);
4850 return tmp_src;
4851 }
4852 else {
4853 /* use texcoord as-is */
4854 return *coord;
4855 }
4856 }
4857
4858
4859 /**
4860 * For SAMPLE_C instructions, emit the extra src register which indicates
4861 * the reference/comparision value.
4862 */
4863 static void
4864 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
4865 unsigned target,
4866 const struct tgsi_full_src_register *coord)
4867 {
4868 struct tgsi_full_src_register coord_src_ref;
4869 unsigned component;
4870
4871 assert(tgsi_is_shadow_target(target));
4872
4873 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
4874 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
4875 target == TGSI_TEXTURE_SHADOWCUBE)
4876 component = TGSI_SWIZZLE_W;
4877 else
4878 component = TGSI_SWIZZLE_Z;
4879
4880 coord_src_ref = scalar_src(coord, component);
4881
4882 emit_src_register(emit, &coord_src_ref);
4883 }
4884
4885
4886 /**
4887 * Info for implementing texture swizzles.
4888 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
4889 * functions use this to encapsulate the extra steps needed to perform
4890 * a texture swizzle, or shadow/depth comparisons.
4891 * The shadow/depth comparison is only done here if for the cases where
4892 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
4893 */
4894 struct tex_swizzle_info
4895 {
4896 boolean swizzled;
4897 boolean shadow_compare;
4898 unsigned unit;
4899 unsigned texture_target; /**< TGSI_TEXTURE_x */
4900 struct tgsi_full_src_register tmp_src;
4901 struct tgsi_full_dst_register tmp_dst;
4902 const struct tgsi_full_dst_register *inst_dst;
4903 const struct tgsi_full_src_register *coord_src;
4904 };
4905
4906
4907 /**
4908 * Do setup for handling texture swizzles or shadow compares.
4909 * \param unit the texture unit
4910 * \param inst the TGSI texture instruction
4911 * \param shadow_compare do shadow/depth comparison?
4912 * \param swz returns the swizzle info
4913 */
4914 static void
4915 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4916 unsigned unit,
4917 const struct tgsi_full_instruction *inst,
4918 boolean shadow_compare,
4919 struct tex_swizzle_info *swz)
4920 {
4921 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
4922 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
4923 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
4924 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
4925
4926 swz->shadow_compare = shadow_compare;
4927 swz->texture_target = inst->Texture.Texture;
4928
4929 if (swz->swizzled || shadow_compare) {
4930 /* Allocate temp register for the result of the SAMPLE instruction
4931 * and the source of the MOV/compare/swizzle instructions.
4932 */
4933 unsigned tmp = get_temp_index(emit);
4934 swz->tmp_src = make_src_temp_reg(tmp);
4935 swz->tmp_dst = make_dst_temp_reg(tmp);
4936
4937 swz->unit = unit;
4938 }
4939 swz->inst_dst = &inst->Dst[0];
4940 swz->coord_src = &inst->Src[0];
4941 }
4942
4943
4944 /**
4945 * Returns the register to put the SAMPLE instruction results into.
4946 * This will either be the original instruction dst reg (if no swizzle
4947 * and no shadow comparison) or a temporary reg if there is a swizzle.
4948 */
4949 static const struct tgsi_full_dst_register *
4950 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
4951 {
4952 return (swz->swizzled || swz->shadow_compare)
4953 ? &swz->tmp_dst : swz->inst_dst;
4954 }
4955
4956
4957 /**
4958 * This emits the MOV instruction that actually implements a texture swizzle
4959 * and/or shadow comparison.
4960 */
4961 static void
4962 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
4963 const struct tex_swizzle_info *swz)
4964 {
4965 if (swz->shadow_compare) {
4966 /* Emit extra instructions to compare the fetched texel value against
4967 * a texture coordinate component. The result of the comparison
4968 * is 0.0 or 1.0.
4969 */
4970 struct tgsi_full_src_register coord_src;
4971 struct tgsi_full_src_register texel_src =
4972 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
4973 struct tgsi_full_src_register one =
4974 make_immediate_reg_float(emit, 1.0f);
4975 /* convert gallium comparison func to SVGA comparison func */
4976 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
4977
4978 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4979
4980 switch (swz->texture_target) {
4981 case TGSI_TEXTURE_SHADOW2D:
4982 case TGSI_TEXTURE_SHADOWRECT:
4983 case TGSI_TEXTURE_SHADOW1D_ARRAY:
4984 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
4985 break;
4986 case TGSI_TEXTURE_SHADOW1D:
4987 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
4988 break;
4989 case TGSI_TEXTURE_SHADOWCUBE:
4990 case TGSI_TEXTURE_SHADOW2D_ARRAY:
4991 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
4992 break;
4993 default:
4994 assert(!"Unexpected texture target in end_tex_swizzle()");
4995 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
4996 }
4997
4998 /* COMPARE tmp, coord, texel */
4999 /* XXX it would seem that the texel and coord arguments should
5000 * be transposed here, but piglit tests indicate otherwise.
5001 */
5002 emit_comparison(emit, compare_func,
5003 &swz->tmp_dst, &texel_src, &coord_src);
5004
5005 /* AND dest, tmp, {1.0} */
5006 begin_emit_instruction(emit);
5007 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
5008 if (swz->swizzled) {
5009 emit_dst_register(emit, &swz->tmp_dst);
5010 }
5011 else {
5012 emit_dst_register(emit, swz->inst_dst);
5013 }
5014 emit_src_register(emit, &swz->tmp_src);
5015 emit_src_register(emit, &one);
5016 end_emit_instruction(emit);
5017 }
5018
5019 if (swz->swizzled) {
5020 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
5021 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
5022 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
5023 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
5024 unsigned writemask_0 = 0, writemask_1 = 0;
5025 boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
5026
5027 /* Swizzle w/out zero/one terms */
5028 struct tgsi_full_src_register src_swizzled =
5029 swizzle_src(&swz->tmp_src,
5030 swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
5031 swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
5032 swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
5033 swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
5034
5035 /* MOV dst, color(tmp).<swizzle> */
5036 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
5037 swz->inst_dst, &src_swizzled, FALSE);
5038
5039 /* handle swizzle zero terms */
5040 writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
5041 ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
5042 ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
5043 ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
5044
5045 if (writemask_0) {
5046 struct tgsi_full_src_register zero = int_tex ?
5047 make_immediate_reg_int(emit, 0) :
5048 make_immediate_reg_float(emit, 0.0f);
5049 struct tgsi_full_dst_register dst =
5050 writemask_dst(swz->inst_dst, writemask_0);
5051
5052 /* MOV dst.writemask_0, {0,0,0,0} */
5053 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
5054 &dst, &zero, FALSE);
5055 }
5056
5057 /* handle swizzle one terms */
5058 writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
5059 ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
5060 ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
5061 ((swz_a == PIPE_SWIZZLE_ONE) << 3));
5062
5063 if (writemask_1) {
5064 struct tgsi_full_src_register one = int_tex ?
5065 make_immediate_reg_int(emit, 1) :
5066 make_immediate_reg_float(emit, 1.0f);
5067 struct tgsi_full_dst_register dst =
5068 writemask_dst(swz->inst_dst, writemask_1);
5069
5070 /* MOV dst.writemask_1, {1,1,1,1} */
5071 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE);
5072 }
5073 }
5074 }
5075
5076
5077 /**
5078 * Emit code for TGSI_OPCODE_SAMPLE instruction.
5079 */
5080 static boolean
5081 emit_sample(struct svga_shader_emitter_v10 *emit,
5082 const struct tgsi_full_instruction *inst)
5083 {
5084 const unsigned resource_unit = inst->Src[1].Register.Index;
5085 const unsigned sampler_unit = inst->Src[2].Register.Index;
5086 struct tgsi_full_src_register coord;
5087 int offsets[3];
5088 struct tex_swizzle_info swz_info;
5089
5090 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
5091
5092 get_texel_offsets(emit, inst, offsets);
5093
5094 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
5095
5096 /* SAMPLE dst, coord(s0), resource, sampler */
5097 begin_emit_instruction(emit);
5098
5099 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
5100 inst->Instruction.Saturate, offsets);
5101 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5102 emit_src_register(emit, &coord);
5103 emit_resource_register(emit, resource_unit);
5104 emit_sampler_register(emit, sampler_unit);
5105 end_emit_instruction(emit);
5106
5107 end_tex_swizzle(emit, &swz_info);
5108
5109 free_temp_indexes(emit);
5110
5111 return TRUE;
5112 }
5113
5114
5115 /**
5116 * Check if a texture instruction is valid.
5117 * An example of an invalid texture instruction is doing shadow comparison
5118 * with an integer-valued texture.
5119 * If we detect an invalid texture instruction, we replace it with:
5120 * MOV dst, {1,1,1,1};
5121 * \return TRUE if valid, FALSE if invalid.
5122 */
5123 static boolean
5124 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
5125 const struct tgsi_full_instruction *inst)
5126 {
5127 const unsigned unit = inst->Src[1].Register.Index;
5128 const unsigned target = inst->Texture.Texture;
5129 boolean valid = TRUE;
5130
5131 if (tgsi_is_shadow_target(target) &&
5132 is_integer_type(emit->key.tex[unit].return_type)) {
5133 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
5134 valid = FALSE;
5135 }
5136 /* XXX might check for other conditions in the future here */
5137
5138 if (!valid) {
5139 /* emit a MOV dst, {1,1,1,1} instruction. */
5140 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
5141 begin_emit_instruction(emit);
5142 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5143 emit_dst_register(emit, &inst->Dst[0]);
5144 emit_src_register(emit, &one);
5145 end_emit_instruction(emit);
5146 }
5147
5148 return valid;
5149 }
5150
5151
5152 /**
5153 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
5154 */
5155 static boolean
5156 emit_tex(struct svga_shader_emitter_v10 *emit,
5157 const struct tgsi_full_instruction *inst)
5158 {
5159 const uint unit = inst->Src[1].Register.Index;
5160 unsigned target = inst->Texture.Texture;
5161 unsigned opcode;
5162 struct tgsi_full_src_register coord;
5163 int offsets[3];
5164 struct tex_swizzle_info swz_info;
5165
5166 /* check that the sampler returns a float */
5167 if (!is_valid_tex_instruction(emit, inst))
5168 return TRUE;
5169
5170 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5171
5172 get_texel_offsets(emit, inst, offsets);
5173
5174 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5175
5176 /* SAMPLE dst, coord(s0), resource, sampler */
5177 begin_emit_instruction(emit);
5178
5179 if (tgsi_is_shadow_target(target))
5180 opcode = VGPU10_OPCODE_SAMPLE_C;
5181 else
5182 opcode = VGPU10_OPCODE_SAMPLE;
5183
5184 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5185 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5186 emit_src_register(emit, &coord);
5187 emit_resource_register(emit, unit);
5188 emit_sampler_register(emit, unit);
5189 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5190 emit_tex_compare_refcoord(emit, target, &coord);
5191 }
5192 end_emit_instruction(emit);
5193
5194 end_tex_swizzle(emit, &swz_info);
5195
5196 free_temp_indexes(emit);
5197
5198 return TRUE;
5199 }
5200
5201
5202 /**
5203 * Emit code for TGSI_OPCODE_TXP (projective texture)
5204 */
5205 static boolean
5206 emit_txp(struct svga_shader_emitter_v10 *emit,
5207 const struct tgsi_full_instruction *inst)
5208 {
5209 const uint unit = inst->Src[1].Register.Index;
5210 unsigned target = inst->Texture.Texture;
5211 unsigned opcode;
5212 int offsets[3];
5213 unsigned tmp = get_temp_index(emit);
5214 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
5215 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
5216 struct tgsi_full_src_register src0_wwww =
5217 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5218 struct tgsi_full_src_register coord;
5219 struct tex_swizzle_info swz_info;
5220
5221 /* check that the sampler returns a float */
5222 if (!is_valid_tex_instruction(emit, inst))
5223 return TRUE;
5224
5225 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5226
5227 get_texel_offsets(emit, inst, offsets);
5228
5229 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5230
5231 /* DIV tmp, coord, coord.wwww */
5232 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
5233 &coord, &src0_wwww, FALSE);
5234
5235 /* SAMPLE dst, coord(tmp), resource, sampler */
5236 begin_emit_instruction(emit);
5237
5238 if (tgsi_is_shadow_target(target))
5239 opcode = VGPU10_OPCODE_SAMPLE_C;
5240 else
5241 opcode = VGPU10_OPCODE_SAMPLE;
5242
5243 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5244 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5245 emit_src_register(emit, &tmp_src); /* projected coord */
5246 emit_resource_register(emit, unit);
5247 emit_sampler_register(emit, unit);
5248 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
5249 emit_tex_compare_refcoord(emit, target, &tmp_src);
5250 }
5251 end_emit_instruction(emit);
5252
5253 end_tex_swizzle(emit, &swz_info);
5254
5255 free_temp_indexes(emit);
5256
5257 return TRUE;
5258 }
5259
5260
5261 /*
5262 * Emit code for TGSI_OPCODE_XPD instruction.
5263 */
5264 static boolean
5265 emit_xpd(struct svga_shader_emitter_v10 *emit,
5266 const struct tgsi_full_instruction *inst)
5267 {
5268 /* dst.x = src0.y * src1.z - src1.y * src0.z
5269 * dst.y = src0.z * src1.x - src1.z * src0.x
5270 * dst.z = src0.x * src1.y - src1.x * src0.y
5271 * dst.w = 1
5272 */
5273 struct tgsi_full_src_register s0_xxxx =
5274 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
5275 struct tgsi_full_src_register s0_yyyy =
5276 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
5277 struct tgsi_full_src_register s0_zzzz =
5278 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
5279
5280 struct tgsi_full_src_register s1_xxxx =
5281 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
5282 struct tgsi_full_src_register s1_yyyy =
5283 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
5284 struct tgsi_full_src_register s1_zzzz =
5285 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
5286
5287 unsigned tmp1 = get_temp_index(emit);
5288 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
5289 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
5290
5291 unsigned tmp2 = get_temp_index(emit);
5292 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
5293 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
5294 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
5295
5296 unsigned tmp3 = get_temp_index(emit);
5297 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
5298 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
5299 struct tgsi_full_dst_register tmp3_dst_x =
5300 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
5301 struct tgsi_full_dst_register tmp3_dst_y =
5302 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
5303 struct tgsi_full_dst_register tmp3_dst_z =
5304 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
5305 struct tgsi_full_dst_register tmp3_dst_w =
5306 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
5307
5308 /* Note: we put all the intermediate computations into tmp3 in case
5309 * the XPD dest register is that same as one of the src regs (in which
5310 * case we could clobber a src reg before we're done with it) .
5311 *
5312 * Note: we could get by with just one temp register instead of three
5313 * since we're doing scalar operations and there's enough room in one
5314 * temp for everything.
5315 */
5316
5317 /* MUL tmp1, src0.y, src1.z */
5318 /* MUL tmp2, src1.y, src0.z */
5319 /* ADD tmp3.x, tmp1, -tmp2 */
5320 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
5321 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
5322 &s0_yyyy, &s1_zzzz, FALSE);
5323 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
5324 &s1_yyyy, &s0_zzzz, FALSE);
5325 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
5326 &tmp1_src, &neg_tmp2_src, FALSE);
5327 }
5328
5329 /* MUL tmp1, src0.z, src1.x */
5330 /* MUL tmp2, src1.z, src0.x */
5331 /* ADD tmp3.y, tmp1, -tmp2 */
5332 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
5333 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
5334 &s1_xxxx, FALSE);
5335 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
5336 &s0_xxxx, FALSE);
5337 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
5338 &tmp1_src, &neg_tmp2_src, FALSE);
5339 }
5340
5341 /* MUL tmp1, src0.x, src1.y */
5342 /* MUL tmp2, src1.x, src0.y */
5343 /* ADD tmp3.z, tmp1, -tmp2 */
5344 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
5345 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
5346 &s1_yyyy, FALSE);
5347 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
5348 &s0_yyyy, FALSE);
5349 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
5350 &tmp1_src, &neg_tmp2_src, FALSE);
5351 }
5352
5353 /* MOV tmp3.w, 1.0 */
5354 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
5355 struct tgsi_full_src_register one =
5356 make_immediate_reg_float(emit, 1.0f);
5357
5358 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
5359 }
5360
5361 /* MOV dst, tmp3 */
5362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
5363 inst->Instruction.Saturate);
5364
5365
5366 free_temp_indexes(emit);
5367
5368 return TRUE;
5369 }
5370
5371
5372 /**
5373 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
5374 */
5375 static boolean
5376 emit_txd(struct svga_shader_emitter_v10 *emit,
5377 const struct tgsi_full_instruction *inst)
5378 {
5379 const uint unit = inst->Src[3].Register.Index;
5380 unsigned target = inst->Texture.Texture;
5381 int offsets[3];
5382 struct tgsi_full_src_register coord;
5383 struct tex_swizzle_info swz_info;
5384
5385 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5386 &swz_info);
5387
5388 get_texel_offsets(emit, inst, offsets);
5389
5390 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5391
5392 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
5393 begin_emit_instruction(emit);
5394 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
5395 inst->Instruction.Saturate, offsets);
5396 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5397 emit_src_register(emit, &coord);
5398 emit_resource_register(emit, unit);
5399 emit_sampler_register(emit, unit);
5400 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
5401 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
5402 end_emit_instruction(emit);
5403
5404 end_tex_swizzle(emit, &swz_info);
5405
5406 free_temp_indexes(emit);
5407
5408 return TRUE;
5409 }
5410
5411
5412 /**
5413 * Emit code for TGSI_OPCODE_TXF (texel fetch)
5414 */
5415 static boolean
5416 emit_txf(struct svga_shader_emitter_v10 *emit,
5417 const struct tgsi_full_instruction *inst)
5418 {
5419 const uint unit = inst->Src[1].Register.Index;
5420 const unsigned msaa = emit->key.tex[unit].texture_msaa;
5421 int offsets[3];
5422 struct tex_swizzle_info swz_info;
5423
5424 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
5425
5426 get_texel_offsets(emit, inst, offsets);
5427
5428 if (msaa) {
5429 /* Fetch one sample from an MSAA texture */
5430 struct tgsi_full_src_register sampleIndex =
5431 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5432 /* LD_MS dst, coord(s0), resource, sampleIndex */
5433 begin_emit_instruction(emit);
5434 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
5435 inst->Instruction.Saturate, offsets);
5436 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5437 emit_src_register(emit, &inst->Src[0]);
5438 emit_resource_register(emit, unit);
5439 emit_src_register(emit, &sampleIndex);
5440 end_emit_instruction(emit);
5441 }
5442 else {
5443 /* Fetch one texel specified by integer coordinate */
5444 /* LD dst, coord(s0), resource */
5445 begin_emit_instruction(emit);
5446 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
5447 inst->Instruction.Saturate, offsets);
5448 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5449 emit_src_register(emit, &inst->Src[0]);
5450 emit_resource_register(emit, unit);
5451 end_emit_instruction(emit);
5452 }
5453
5454 end_tex_swizzle(emit, &swz_info);
5455
5456 free_temp_indexes(emit);
5457
5458 return TRUE;
5459 }
5460
5461
5462 /**
5463 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
5464 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
5465 */
5466 static boolean
5467 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
5468 const struct tgsi_full_instruction *inst)
5469 {
5470 unsigned target = inst->Texture.Texture;
5471 unsigned opcode, unit;
5472 int offsets[3];
5473 struct tgsi_full_src_register coord, lod_bias;
5474 struct tex_swizzle_info swz_info;
5475
5476 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
5477 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
5478 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
5479
5480 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
5481 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
5482 unit = inst->Src[2].Register.Index;
5483 }
5484 else {
5485 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
5486 unit = inst->Src[1].Register.Index;
5487 }
5488
5489 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
5490 &swz_info);
5491
5492 get_texel_offsets(emit, inst, offsets);
5493
5494 coord = setup_texcoord(emit, unit, &inst->Src[0]);
5495
5496 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
5497 begin_emit_instruction(emit);
5498 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
5499 opcode = VGPU10_OPCODE_SAMPLE_L;
5500 }
5501 else {
5502 opcode = VGPU10_OPCODE_SAMPLE_B;
5503 }
5504 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
5505 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
5506 emit_src_register(emit, &coord);
5507 emit_resource_register(emit, unit);
5508 emit_sampler_register(emit, unit);
5509 emit_src_register(emit, &lod_bias);
5510 end_emit_instruction(emit);
5511
5512 end_tex_swizzle(emit, &swz_info);
5513
5514 free_temp_indexes(emit);
5515
5516 return TRUE;
5517 }
5518
5519
5520 /**
5521 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
5522 */
5523 static boolean
5524 emit_txq(struct svga_shader_emitter_v10 *emit,
5525 const struct tgsi_full_instruction *inst)
5526 {
5527 const uint unit = inst->Src[1].Register.Index;
5528
5529 if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
5530 /* RESINFO does not support querying texture buffers, so we instead
5531 * store texture buffer sizes in shader constants, then copy them to
5532 * implement TXQ instead of emitting RESINFO.
5533 * MOV dst, const[texture_buffer_size_index[unit]]
5534 */
5535 struct tgsi_full_src_register size_src =
5536 make_src_const_reg(emit->texture_buffer_size_index[unit]);
5537 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src,
5538 FALSE);
5539 } else {
5540 /* RESINFO dst, srcMipLevel, resource */
5541 begin_emit_instruction(emit);
5542 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
5543 emit_dst_register(emit, &inst->Dst[0]);
5544 emit_src_register(emit, &inst->Src[0]);
5545 emit_resource_register(emit, unit);
5546 end_emit_instruction(emit);
5547 }
5548
5549 free_temp_indexes(emit);
5550
5551 return TRUE;
5552 }
5553
5554
5555 /**
5556 * Emit a simple instruction (like ADD, MUL, MIN, etc).
5557 */
5558 static boolean
5559 emit_simple(struct svga_shader_emitter_v10 *emit,
5560 const struct tgsi_full_instruction *inst)
5561 {
5562 const unsigned opcode = inst->Instruction.Opcode;
5563 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5564 unsigned i;
5565
5566 begin_emit_instruction(emit);
5567 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5568 inst->Instruction.Saturate);
5569 for (i = 0; i < op->num_dst; i++) {
5570 emit_dst_register(emit, &inst->Dst[i]);
5571 }
5572 for (i = 0; i < op->num_src; i++) {
5573 emit_src_register(emit, &inst->Src[i]);
5574 }
5575 end_emit_instruction(emit);
5576
5577 return TRUE;
5578 }
5579
5580
5581 /**
5582 * We only special case the MOV instruction to try to detect constant
5583 * color writes in the fragment shader.
5584 */
5585 static boolean
5586 emit_mov(struct svga_shader_emitter_v10 *emit,
5587 const struct tgsi_full_instruction *inst)
5588 {
5589 const struct tgsi_full_src_register *src = &inst->Src[0];
5590 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
5591
5592 if (emit->unit == PIPE_SHADER_FRAGMENT &&
5593 dst->Register.File == TGSI_FILE_OUTPUT &&
5594 dst->Register.Index == 0 &&
5595 src->Register.File == TGSI_FILE_CONSTANT &&
5596 !src->Register.Indirect) {
5597 emit->constant_color_output = TRUE;
5598 }
5599
5600 return emit_simple(emit, inst);
5601 }
5602
5603
5604 /**
5605 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
5606 * where TGSI only uses one dest register.
5607 */
5608 static boolean
5609 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
5610 const struct tgsi_full_instruction *inst,
5611 unsigned dst_count,
5612 unsigned dst_index)
5613 {
5614 const unsigned opcode = inst->Instruction.Opcode;
5615 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
5616 unsigned i;
5617
5618 begin_emit_instruction(emit);
5619 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
5620 inst->Instruction.Saturate);
5621
5622 for (i = 0; i < dst_count; i++) {
5623 if (i == dst_index) {
5624 emit_dst_register(emit, &inst->Dst[0]);
5625 } else {
5626 emit_null_dst_register(emit);
5627 }
5628 }
5629
5630 for (i = 0; i < op->num_src; i++) {
5631 emit_src_register(emit, &inst->Src[i]);
5632 }
5633 end_emit_instruction(emit);
5634
5635 return TRUE;
5636 }
5637
5638
5639 /**
5640 * Translate a single TGSI instruction to VGPU10.
5641 */
5642 static boolean
5643 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
5644 unsigned inst_number,
5645 const struct tgsi_full_instruction *inst)
5646 {
5647 const unsigned opcode = inst->Instruction.Opcode;
5648
5649 switch (opcode) {
5650 case TGSI_OPCODE_ADD:
5651 case TGSI_OPCODE_AND:
5652 case TGSI_OPCODE_BGNLOOP:
5653 case TGSI_OPCODE_BRK:
5654 case TGSI_OPCODE_CEIL:
5655 case TGSI_OPCODE_CONT:
5656 case TGSI_OPCODE_DDX:
5657 case TGSI_OPCODE_DDY:
5658 case TGSI_OPCODE_DIV:
5659 case TGSI_OPCODE_DP2:
5660 case TGSI_OPCODE_DP3:
5661 case TGSI_OPCODE_DP4:
5662 case TGSI_OPCODE_ELSE:
5663 case TGSI_OPCODE_ENDIF:
5664 case TGSI_OPCODE_ENDLOOP:
5665 case TGSI_OPCODE_ENDSUB:
5666 case TGSI_OPCODE_F2I:
5667 case TGSI_OPCODE_F2U:
5668 case TGSI_OPCODE_FLR:
5669 case TGSI_OPCODE_FRC:
5670 case TGSI_OPCODE_FSEQ:
5671 case TGSI_OPCODE_FSGE:
5672 case TGSI_OPCODE_FSLT:
5673 case TGSI_OPCODE_FSNE:
5674 case TGSI_OPCODE_I2F:
5675 case TGSI_OPCODE_IMAX:
5676 case TGSI_OPCODE_IMIN:
5677 case TGSI_OPCODE_INEG:
5678 case TGSI_OPCODE_ISGE:
5679 case TGSI_OPCODE_ISHR:
5680 case TGSI_OPCODE_ISLT:
5681 case TGSI_OPCODE_MAD:
5682 case TGSI_OPCODE_MAX:
5683 case TGSI_OPCODE_MIN:
5684 case TGSI_OPCODE_MUL:
5685 case TGSI_OPCODE_NOP:
5686 case TGSI_OPCODE_NOT:
5687 case TGSI_OPCODE_OR:
5688 case TGSI_OPCODE_RET:
5689 case TGSI_OPCODE_UADD:
5690 case TGSI_OPCODE_USEQ:
5691 case TGSI_OPCODE_USGE:
5692 case TGSI_OPCODE_USLT:
5693 case TGSI_OPCODE_UMIN:
5694 case TGSI_OPCODE_UMAD:
5695 case TGSI_OPCODE_UMAX:
5696 case TGSI_OPCODE_ROUND:
5697 case TGSI_OPCODE_SQRT:
5698 case TGSI_OPCODE_SHL:
5699 case TGSI_OPCODE_TRUNC:
5700 case TGSI_OPCODE_U2F:
5701 case TGSI_OPCODE_UCMP:
5702 case TGSI_OPCODE_USHR:
5703 case TGSI_OPCODE_USNE:
5704 case TGSI_OPCODE_XOR:
5705 /* simple instructions */
5706 return emit_simple(emit, inst);
5707
5708 case TGSI_OPCODE_MOV:
5709 return emit_mov(emit, inst);
5710 case TGSI_OPCODE_EMIT:
5711 return emit_vertex(emit, inst);
5712 case TGSI_OPCODE_ENDPRIM:
5713 return emit_endprim(emit, inst);
5714 case TGSI_OPCODE_ABS:
5715 return emit_abs(emit, inst);
5716 case TGSI_OPCODE_IABS:
5717 return emit_iabs(emit, inst);
5718 case TGSI_OPCODE_ARL:
5719 /* fall-through */
5720 case TGSI_OPCODE_UARL:
5721 return emit_arl_uarl(emit, inst);
5722 case TGSI_OPCODE_BGNSUB:
5723 /* no-op */
5724 return TRUE;
5725 case TGSI_OPCODE_CAL:
5726 return emit_cal(emit, inst);
5727 case TGSI_OPCODE_CMP:
5728 return emit_cmp(emit, inst);
5729 case TGSI_OPCODE_COS:
5730 return emit_sincos(emit, inst);
5731 case TGSI_OPCODE_DP2A:
5732 return emit_dp2a(emit, inst);
5733 case TGSI_OPCODE_DPH:
5734 return emit_dph(emit, inst);
5735 case TGSI_OPCODE_DST:
5736 return emit_dst(emit, inst);
5737 case TGSI_OPCODE_EX2:
5738 return emit_ex2(emit, inst);
5739 case TGSI_OPCODE_EXP:
5740 return emit_exp(emit, inst);
5741 case TGSI_OPCODE_IF:
5742 return emit_if(emit, inst);
5743 case TGSI_OPCODE_KILL:
5744 return emit_kill(emit, inst);
5745 case TGSI_OPCODE_KILL_IF:
5746 return emit_kill_if(emit, inst);
5747 case TGSI_OPCODE_LG2:
5748 return emit_lg2(emit, inst);
5749 case TGSI_OPCODE_LIT:
5750 return emit_lit(emit, inst);
5751 case TGSI_OPCODE_LOG:
5752 return emit_log(emit, inst);
5753 case TGSI_OPCODE_LRP:
5754 return emit_lrp(emit, inst);
5755 case TGSI_OPCODE_POW:
5756 return emit_pow(emit, inst);
5757 case TGSI_OPCODE_RCP:
5758 return emit_rcp(emit, inst);
5759 case TGSI_OPCODE_RSQ:
5760 return emit_rsq(emit, inst);
5761 case TGSI_OPCODE_SAMPLE:
5762 return emit_sample(emit, inst);
5763 case TGSI_OPCODE_SCS:
5764 return emit_scs(emit, inst);
5765 case TGSI_OPCODE_SEQ:
5766 return emit_seq(emit, inst);
5767 case TGSI_OPCODE_SGE:
5768 return emit_sge(emit, inst);
5769 case TGSI_OPCODE_SGT:
5770 return emit_sgt(emit, inst);
5771 case TGSI_OPCODE_SIN:
5772 return emit_sincos(emit, inst);
5773 case TGSI_OPCODE_SLE:
5774 return emit_sle(emit, inst);
5775 case TGSI_OPCODE_SLT:
5776 return emit_slt(emit, inst);
5777 case TGSI_OPCODE_SNE:
5778 return emit_sne(emit, inst);
5779 case TGSI_OPCODE_SSG:
5780 return emit_ssg(emit, inst);
5781 case TGSI_OPCODE_ISSG:
5782 return emit_issg(emit, inst);
5783 case TGSI_OPCODE_SUB:
5784 return emit_sub(emit, inst);
5785 case TGSI_OPCODE_TEX:
5786 return emit_tex(emit, inst);
5787 case TGSI_OPCODE_TXP:
5788 return emit_txp(emit, inst);
5789 case TGSI_OPCODE_TXB:
5790 case TGSI_OPCODE_TXB2:
5791 case TGSI_OPCODE_TXL:
5792 return emit_txl_txb(emit, inst);
5793 case TGSI_OPCODE_TXD:
5794 return emit_txd(emit, inst);
5795 case TGSI_OPCODE_TXF:
5796 return emit_txf(emit, inst);
5797 case TGSI_OPCODE_TXQ:
5798 return emit_txq(emit, inst);
5799 case TGSI_OPCODE_UIF:
5800 return emit_if(emit, inst);
5801 case TGSI_OPCODE_XPD:
5802 return emit_xpd(emit, inst);
5803 case TGSI_OPCODE_UMUL_HI:
5804 case TGSI_OPCODE_IMUL_HI:
5805 case TGSI_OPCODE_UDIV:
5806 case TGSI_OPCODE_IDIV:
5807 /* These cases use only the FIRST of two destination registers */
5808 return emit_simple_1dst(emit, inst, 2, 0);
5809 case TGSI_OPCODE_UMUL:
5810 case TGSI_OPCODE_UMOD:
5811 case TGSI_OPCODE_MOD:
5812 /* These cases use only the SECOND of two destination registers */
5813 return emit_simple_1dst(emit, inst, 2, 1);
5814 case TGSI_OPCODE_END:
5815 if (!emit_post_helpers(emit))
5816 return FALSE;
5817 return emit_simple(emit, inst);
5818
5819 default:
5820 debug_printf("Unimplemented tgsi instruction %s\n",
5821 tgsi_get_opcode_name(opcode));
5822 return FALSE;
5823 }
5824
5825 return TRUE;
5826 }
5827
5828
5829 /**
5830 * Emit the extra instructions to adjust the vertex position.
5831 * There are two possible adjustments:
5832 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
5833 * "prescale" and "pretranslate" values.
5834 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
5835 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
5836 */
5837 static void
5838 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit,
5839 unsigned vs_pos_tmp_index)
5840 {
5841 struct tgsi_full_src_register tmp_pos_src;
5842 struct tgsi_full_dst_register pos_dst;
5843
5844 /* Don't bother to emit any extra vertex instructions if vertex position is
5845 * not written out
5846 */
5847 if (emit->vposition.out_index == INVALID_INDEX)
5848 return;
5849
5850 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
5851 pos_dst = make_dst_output_reg(emit->vposition.out_index);
5852
5853 /* If non-adjusted vertex position register index
5854 * is valid, copy the vertex position from the temporary
5855 * vertex position register before it is modified by the
5856 * prescale computation.
5857 */
5858 if (emit->vposition.so_index != INVALID_INDEX) {
5859 struct tgsi_full_dst_register pos_so_dst =
5860 make_dst_output_reg(emit->vposition.so_index);
5861
5862 /* MOV pos_so, tmp_pos */
5863 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst,
5864 &tmp_pos_src, FALSE);
5865 }
5866
5867 if (emit->vposition.need_prescale) {
5868 /* This code adjusts the vertex position to match the VGPU10 convention.
5869 * If p is the position computed by the shader (usually by applying the
5870 * modelview and projection matrices), the new position q is computed by:
5871 *
5872 * q.x = p.w * trans.x + p.x * scale.x
5873 * q.y = p.w * trans.y + p.y * scale.y
5874 * q.z = p.w * trans.z + p.z * scale.z;
5875 * q.w = p.w * trans.w + p.w;
5876 */
5877 struct tgsi_full_src_register tmp_pos_src_w =
5878 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5879 struct tgsi_full_dst_register tmp_pos_dst =
5880 make_dst_temp_reg(vs_pos_tmp_index);
5881 struct tgsi_full_dst_register tmp_pos_dst_xyz =
5882 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
5883
5884 struct tgsi_full_src_register prescale_scale =
5885 make_src_const_reg(emit->vposition.prescale_scale_index);
5886 struct tgsi_full_src_register prescale_trans =
5887 make_src_const_reg(emit->vposition.prescale_trans_index);
5888
5889 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
5890 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
5891 &tmp_pos_src, &prescale_scale, FALSE);
5892
5893 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
5894 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
5895 &prescale_trans, &tmp_pos_src, FALSE);
5896 }
5897 else if (emit->key.vs.undo_viewport) {
5898 /* This code computes the final vertex position from the temporary
5899 * vertex position by undoing the viewport transformation and the
5900 * divide-by-W operation (we convert window coords back to clip coords).
5901 * This is needed when we use the 'draw' module for fallbacks.
5902 * If p is the temp pos in window coords, then the NDC coord q is:
5903 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
5904 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
5905 * q.z = p.z * p.w
5906 * q.w = p.w
5907 * CONST[vs_viewport_index] contains:
5908 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
5909 */
5910 struct tgsi_full_dst_register tmp_pos_dst =
5911 make_dst_temp_reg(vs_pos_tmp_index);
5912 struct tgsi_full_dst_register tmp_pos_dst_xy =
5913 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
5914 struct tgsi_full_src_register tmp_pos_src_wwww =
5915 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
5916
5917 struct tgsi_full_dst_register pos_dst_xyz =
5918 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
5919 struct tgsi_full_dst_register pos_dst_w =
5920 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
5921
5922 struct tgsi_full_src_register vp_xyzw =
5923 make_src_const_reg(emit->vs.viewport_index);
5924 struct tgsi_full_src_register vp_zwww =
5925 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
5926 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
5927
5928 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
5929 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
5930 &tmp_pos_src, &vp_zwww, FALSE);
5931
5932 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
5933 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
5934 &tmp_pos_src, &vp_xyzw, FALSE);
5935
5936 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
5937 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
5938 &tmp_pos_src, &tmp_pos_src_wwww, FALSE);
5939
5940 /* MOV pos.w, tmp_pos.w */
5941 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w,
5942 &tmp_pos_src, FALSE);
5943 }
5944 else if (vs_pos_tmp_index != INVALID_INDEX) {
5945 /* This code is to handle the case where the temporary vertex
5946 * position register is created when the vertex shader has stream
5947 * output and prescale is disabled because rasterization is to be
5948 * discarded.
5949 */
5950 struct tgsi_full_dst_register pos_dst =
5951 make_dst_output_reg(emit->vposition.out_index);
5952
5953 /* MOV pos, tmp_pos */
5954 begin_emit_instruction(emit);
5955 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
5956 emit_dst_register(emit, &pos_dst);
5957 emit_src_register(emit, &tmp_pos_src);
5958 end_emit_instruction(emit);
5959 }
5960 }
5961
5962 static void
5963 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
5964 {
5965 if (emit->clip_mode == CLIP_DISTANCE) {
5966 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
5967 emit_clip_distance_instructions(emit);
5968
5969 } else if (emit->clip_mode == CLIP_VERTEX) {
5970 /* Convert TGSI CLIPVERTEX to CLIPDIST */
5971 emit_clip_vertex_instructions(emit);
5972 }
5973
5974 /**
5975 * Emit vertex position and take care of legacy user planes only if
5976 * there is a valid vertex position register index.
5977 * This is to take care of the case
5978 * where the shader doesn't output vertex position. Then in
5979 * this case, don't bother to emit more vertex instructions.
5980 */
5981 if (emit->vposition.out_index == INVALID_INDEX)
5982 return;
5983
5984 /**
5985 * Emit per-vertex clipping instructions for legacy user defined clip planes.
5986 * NOTE: we must emit the clip distance instructions before the
5987 * emit_vpos_instructions() call since the later function will change
5988 * the TEMP[vs_pos_tmp_index] value.
5989 */
5990 if (emit->clip_mode == CLIP_LEGACY) {
5991 /* Emit CLIPDIST for legacy user defined clip planes */
5992 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
5993 }
5994 }
5995
5996
5997 /**
5998 * Emit extra per-vertex instructions. This includes clip-coordinate
5999 * space conversion and computing clip distances. This is called for
6000 * each GS emit-vertex instruction and at the end of VS translation.
6001 */
6002 static void
6003 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6004 {
6005 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
6006
6007 /* Emit clipping instructions based on clipping mode */
6008 emit_clipping_instructions(emit);
6009
6010 /**
6011 * Reset the temporary vertex position register index
6012 * so that emit_dst_register() will use the real vertex position output
6013 */
6014 emit->vposition.tmp_index = INVALID_INDEX;
6015
6016 /* Emit vertex position instructions */
6017 emit_vpos_instructions(emit, vs_pos_tmp_index);
6018
6019 /* Restore original vposition.tmp_index value for the next GS vertex.
6020 * It doesn't matter for VS.
6021 */
6022 emit->vposition.tmp_index = vs_pos_tmp_index;
6023 }
6024
6025 /**
6026 * Translate the TGSI_OPCODE_EMIT GS instruction.
6027 */
6028 static boolean
6029 emit_vertex(struct svga_shader_emitter_v10 *emit,
6030 const struct tgsi_full_instruction *inst)
6031 {
6032 unsigned ret = TRUE;
6033
6034 assert(emit->unit == PIPE_SHADER_GEOMETRY);
6035
6036 emit_vertex_instructions(emit);
6037
6038 /* We can't use emit_simple() because the TGSI instruction has one
6039 * operand (vertex stream number) which we must ignore for VGPU10.
6040 */
6041 begin_emit_instruction(emit);
6042 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
6043 end_emit_instruction(emit);
6044
6045 return ret;
6046 }
6047
6048
6049 /**
6050 * Emit the extra code to convert from VGPU10's boolean front-face
6051 * register to TGSI's signed front-face register.
6052 *
6053 * TODO: Make temporary front-face register a scalar.
6054 */
6055 static void
6056 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
6057 {
6058 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6059
6060 if (emit->fs.face_input_index != INVALID_INDEX) {
6061 /* convert vgpu10 boolean face register to gallium +/-1 value */
6062 struct tgsi_full_dst_register tmp_dst =
6063 make_dst_temp_reg(emit->fs.face_tmp_index);
6064 struct tgsi_full_src_register one =
6065 make_immediate_reg_float(emit, 1.0f);
6066 struct tgsi_full_src_register neg_one =
6067 make_immediate_reg_float(emit, -1.0f);
6068
6069 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
6070 begin_emit_instruction(emit);
6071 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
6072 emit_dst_register(emit, &tmp_dst);
6073 emit_face_register(emit);
6074 emit_src_register(emit, &one);
6075 emit_src_register(emit, &neg_one);
6076 end_emit_instruction(emit);
6077 }
6078 }
6079
6080
6081 /**
6082 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
6083 */
6084 static void
6085 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
6086 {
6087 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6088
6089 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
6090 struct tgsi_full_dst_register tmp_dst =
6091 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
6092 struct tgsi_full_dst_register tmp_dst_xyz =
6093 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
6094 struct tgsi_full_dst_register tmp_dst_w =
6095 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
6096 struct tgsi_full_src_register one =
6097 make_immediate_reg_float(emit, 1.0f);
6098 struct tgsi_full_src_register fragcoord =
6099 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
6100
6101 /* save the input index */
6102 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
6103 /* set to invalid to prevent substitution in emit_src_register() */
6104 emit->fs.fragcoord_input_index = INVALID_INDEX;
6105
6106 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
6107 begin_emit_instruction(emit);
6108 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6109 emit_dst_register(emit, &tmp_dst_xyz);
6110 emit_src_register(emit, &fragcoord);
6111 end_emit_instruction(emit);
6112
6113 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
6114 begin_emit_instruction(emit);
6115 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
6116 emit_dst_register(emit, &tmp_dst_w);
6117 emit_src_register(emit, &one);
6118 emit_src_register(emit, &fragcoord);
6119 end_emit_instruction(emit);
6120
6121 /* restore saved value */
6122 emit->fs.fragcoord_input_index = fragcoord_input_index;
6123 }
6124 }
6125
6126
6127 /**
6128 * Emit extra instructions to adjust VS inputs/attributes. This can
6129 * mean casting a vertex attribute from int to float or setting the
6130 * W component to 1, or both.
6131 */
6132 static void
6133 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
6134 {
6135 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
6136 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
6137 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
6138 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
6139 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
6140 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
6141 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
6142
6143 unsigned adjust_mask = (save_w_1_mask |
6144 save_itof_mask |
6145 save_utof_mask |
6146 save_is_bgra_mask |
6147 save_puint_to_snorm_mask |
6148 save_puint_to_uscaled_mask |
6149 save_puint_to_sscaled_mask);
6150
6151 assert(emit->unit == PIPE_SHADER_VERTEX);
6152
6153 if (adjust_mask) {
6154 struct tgsi_full_src_register one =
6155 make_immediate_reg_float(emit, 1.0f);
6156
6157 struct tgsi_full_src_register one_int =
6158 make_immediate_reg_int(emit, 1);
6159
6160 /* We need to turn off these bitmasks while emitting the
6161 * instructions below, then restore them afterward.
6162 */
6163 emit->key.vs.adjust_attrib_w_1 = 0;
6164 emit->key.vs.adjust_attrib_itof = 0;
6165 emit->key.vs.adjust_attrib_utof = 0;
6166 emit->key.vs.attrib_is_bgra = 0;
6167 emit->key.vs.attrib_puint_to_snorm = 0;
6168 emit->key.vs.attrib_puint_to_uscaled = 0;
6169 emit->key.vs.attrib_puint_to_sscaled = 0;
6170
6171 while (adjust_mask) {
6172 unsigned index = u_bit_scan(&adjust_mask);
6173
6174 /* skip the instruction if this vertex attribute is not being used */
6175 if (emit->info.input_usage_mask[index] == 0)
6176 continue;
6177
6178 unsigned tmp = emit->vs.adjusted_input[index];
6179 struct tgsi_full_src_register input_src =
6180 make_src_reg(TGSI_FILE_INPUT, index);
6181
6182 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6183 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6184 struct tgsi_full_dst_register tmp_dst_w =
6185 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
6186
6187 /* ITOF/UTOF/MOV tmp, input[index] */
6188 if (save_itof_mask & (1 << index)) {
6189 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
6190 &tmp_dst, &input_src, FALSE);
6191 }
6192 else if (save_utof_mask & (1 << index)) {
6193 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
6194 &tmp_dst, &input_src, FALSE);
6195 }
6196 else if (save_puint_to_snorm_mask & (1 << index)) {
6197 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
6198 }
6199 else if (save_puint_to_uscaled_mask & (1 << index)) {
6200 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
6201 }
6202 else if (save_puint_to_sscaled_mask & (1 << index)) {
6203 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
6204 }
6205 else {
6206 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
6207 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6208 &tmp_dst, &input_src, FALSE);
6209 }
6210
6211 if (save_is_bgra_mask & (1 << index)) {
6212 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
6213 }
6214
6215 if (save_w_1_mask & (1 << index)) {
6216 /* MOV tmp.w, 1.0 */
6217 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
6218 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6219 &tmp_dst_w, &one_int, FALSE);
6220 }
6221 else {
6222 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6223 &tmp_dst_w, &one, FALSE);
6224 }
6225 }
6226 }
6227
6228 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
6229 emit->key.vs.adjust_attrib_itof = save_itof_mask;
6230 emit->key.vs.adjust_attrib_utof = save_utof_mask;
6231 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
6232 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
6233 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
6234 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
6235 }
6236 }
6237
6238
6239 /**
6240 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
6241 * to implement some instructions. We pre-allocate those values here
6242 * in the immediate constant buffer.
6243 */
6244 static void
6245 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
6246 {
6247 unsigned n = 0;
6248
6249 emit->common_immediate_pos[n++] =
6250 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
6251
6252 emit->common_immediate_pos[n++] =
6253 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
6254
6255 emit->common_immediate_pos[n++] =
6256 alloc_immediate_int4(emit, 0, 1, 0, -1);
6257
6258 if (emit->key.vs.attrib_puint_to_snorm) {
6259 emit->common_immediate_pos[n++] =
6260 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6261 }
6262
6263 if (emit->key.vs.attrib_puint_to_uscaled) {
6264 emit->common_immediate_pos[n++] =
6265 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
6266 }
6267
6268 if (emit->key.vs.attrib_puint_to_sscaled) {
6269 emit->common_immediate_pos[n++] =
6270 alloc_immediate_int4(emit, 22, 12, 2, 0);
6271
6272 emit->common_immediate_pos[n++] =
6273 alloc_immediate_int4(emit, 22, 30, 0, 0);
6274 }
6275
6276 assert(n <= Elements(emit->common_immediate_pos));
6277 emit->num_common_immediates = n;
6278 }
6279
6280
6281 /**
6282 * Emit any extra/helper declarations/code that we might need between
6283 * the declaration section and code section.
6284 */
6285 static boolean
6286 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
6287 {
6288 /* Properties */
6289 if (emit->unit == PIPE_SHADER_GEOMETRY)
6290 emit_property_instructions(emit);
6291
6292 /* Declare inputs */
6293 if (!emit_input_declarations(emit))
6294 return FALSE;
6295
6296 /* Declare outputs */
6297 if (!emit_output_declarations(emit))
6298 return FALSE;
6299
6300 /* Declare temporary registers */
6301 emit_temporaries_declaration(emit);
6302
6303 /* Declare constant registers */
6304 emit_constant_declaration(emit);
6305
6306 /* Declare samplers and resources */
6307 emit_sampler_declarations(emit);
6308 emit_resource_declarations(emit);
6309
6310 /* Declare clip distance output registers */
6311 if (emit->unit == PIPE_SHADER_VERTEX ||
6312 emit->unit == PIPE_SHADER_GEOMETRY) {
6313 emit_clip_distance_declarations(emit);
6314 }
6315
6316 alloc_common_immediates(emit);
6317
6318 if (emit->unit == PIPE_SHADER_FRAGMENT &&
6319 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6320 float alpha = emit->key.fs.alpha_ref;
6321 emit->fs.alpha_ref_index =
6322 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
6323 }
6324
6325 /* Now, emit the constant block containing all the immediates
6326 * declared by shader, as well as the extra ones seen above.
6327 */
6328 emit_vgpu10_immediates_block(emit);
6329
6330 if (emit->unit == PIPE_SHADER_FRAGMENT) {
6331 emit_frontface_instructions(emit);
6332 emit_fragcoord_instructions(emit);
6333 }
6334 else if (emit->unit == PIPE_SHADER_VERTEX) {
6335 emit_vertex_attrib_instructions(emit);
6336 }
6337
6338 return TRUE;
6339 }
6340
6341
6342 /**
6343 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
6344 * against the alpha reference value and discards the fragment if the
6345 * comparison fails.
6346 */
6347 static void
6348 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
6349 unsigned fs_color_tmp_index)
6350 {
6351 /* compare output color's alpha to alpha ref and kill */
6352 unsigned tmp = get_temp_index(emit);
6353 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6354 struct tgsi_full_src_register tmp_src_x =
6355 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6356 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6357 struct tgsi_full_src_register color_src =
6358 make_src_temp_reg(fs_color_tmp_index);
6359 struct tgsi_full_src_register color_src_w =
6360 scalar_src(&color_src, TGSI_SWIZZLE_W);
6361 struct tgsi_full_src_register ref_src =
6362 make_src_immediate_reg(emit->fs.alpha_ref_index);
6363 struct tgsi_full_dst_register color_dst =
6364 make_dst_output_reg(emit->fs.color_out_index[0]);
6365
6366 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6367
6368 /* dst = src0 'alpha_func' src1 */
6369 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
6370 &color_src_w, &ref_src);
6371
6372 /* DISCARD if dst.x == 0 */
6373 begin_emit_instruction(emit);
6374 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */
6375 emit_src_register(emit, &tmp_src_x);
6376 end_emit_instruction(emit);
6377
6378 /* If we don't need to broadcast the color below or set fragments to
6379 * white, emit final color here.
6380 */
6381 if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
6382 !emit->key.fs.white_fragments) {
6383 /* MOV output.color, tempcolor */
6384 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6385 &color_src, FALSE); /* XXX saturate? */
6386 }
6387
6388 free_temp_indexes(emit);
6389 }
6390
6391
6392 /**
6393 * When we need to emit white for all fragments (for emulating XOR logicop
6394 * mode), this function copies white into the temporary color output register.
6395 */
6396 static void
6397 emit_set_color_white(struct svga_shader_emitter_v10 *emit,
6398 unsigned fs_color_tmp_index)
6399 {
6400 struct tgsi_full_dst_register color_dst =
6401 make_dst_temp_reg(fs_color_tmp_index);
6402 struct tgsi_full_src_register white =
6403 make_immediate_reg_float(emit, 1.0f);
6404
6405 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
6406 }
6407
6408
6409 /**
6410 * Emit instructions for writing a single color output to multiple
6411 * color buffers.
6412 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
6413 * when key.fs.white_fragments is true).
6414 * property is set and the number of render targets is greater than one.
6415 * \param fs_color_tmp_index index of the temp register that holds the
6416 * color to broadcast.
6417 */
6418 static void
6419 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
6420 unsigned fs_color_tmp_index)
6421 {
6422 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
6423 unsigned i;
6424 struct tgsi_full_src_register color_src =
6425 make_src_temp_reg(fs_color_tmp_index);
6426
6427 assert(emit->unit == PIPE_SHADER_FRAGMENT);
6428
6429 for (i = 0; i < n; i++) {
6430 unsigned output_reg = emit->fs.color_out_index[i];
6431 struct tgsi_full_dst_register color_dst =
6432 make_dst_output_reg(output_reg);
6433
6434 /* Fill in this semantic here since we'll use it later in
6435 * emit_dst_register().
6436 */
6437 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
6438
6439 /* MOV output.color[i], tempcolor */
6440 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
6441 &color_src, FALSE); /* XXX saturate? */
6442 }
6443 }
6444
6445
6446 /**
6447 * Emit extra helper code after the original shader code, but before the
6448 * last END/RET instruction.
6449 * For vertex shaders this means emitting the extra code to apply the
6450 * prescale scale/translation.
6451 */
6452 static boolean
6453 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
6454 {
6455 if (emit->unit == PIPE_SHADER_VERTEX) {
6456 emit_vertex_instructions(emit);
6457 }
6458 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
6459 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
6460
6461 /* We no longer want emit_dst_register() to substitute the
6462 * temporary fragment color register for the real color output.
6463 */
6464 emit->fs.color_tmp_index = INVALID_INDEX;
6465
6466 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
6467 emit_alpha_test_instructions(emit, fs_color_tmp_index);
6468 }
6469 if (emit->key.fs.white_fragments) {
6470 emit_set_color_white(emit, fs_color_tmp_index);
6471 }
6472 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
6473 emit->key.fs.white_fragments) {
6474 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
6475 }
6476 }
6477
6478 return TRUE;
6479 }
6480
6481
6482 /**
6483 * Translate the TGSI tokens into VGPU10 tokens.
6484 */
6485 static boolean
6486 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
6487 const struct tgsi_token *tokens)
6488 {
6489 struct tgsi_parse_context parse;
6490 boolean ret = TRUE;
6491 boolean pre_helpers_emitted = FALSE;
6492 unsigned inst_number = 0;
6493
6494 tgsi_parse_init(&parse, tokens);
6495
6496 while (!tgsi_parse_end_of_tokens(&parse)) {
6497 tgsi_parse_token(&parse);
6498
6499 switch (parse.FullToken.Token.Type) {
6500 case TGSI_TOKEN_TYPE_IMMEDIATE:
6501 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
6502 if (!ret)
6503 goto done;
6504 break;
6505
6506 case TGSI_TOKEN_TYPE_DECLARATION:
6507 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
6508 if (!ret)
6509 goto done;
6510 break;
6511
6512 case TGSI_TOKEN_TYPE_INSTRUCTION:
6513 if (!pre_helpers_emitted) {
6514 ret = emit_pre_helpers(emit);
6515 if (!ret)
6516 goto done;
6517 pre_helpers_emitted = TRUE;
6518 }
6519 ret = emit_vgpu10_instruction(emit, inst_number++,
6520 &parse.FullToken.FullInstruction);
6521 if (!ret)
6522 goto done;
6523 break;
6524
6525 case TGSI_TOKEN_TYPE_PROPERTY:
6526 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
6527 if (!ret)
6528 goto done;
6529 break;
6530
6531 default:
6532 break;
6533 }
6534 }
6535
6536 done:
6537 tgsi_parse_free(&parse);
6538 return ret;
6539 }
6540
6541
6542 /**
6543 * Emit the first VGPU10 shader tokens.
6544 */
6545 static boolean
6546 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
6547 {
6548 VGPU10ProgramToken ptoken;
6549
6550 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
6551 ptoken.majorVersion = 4;
6552 ptoken.minorVersion = 0;
6553 ptoken.programType = translate_shader_type(emit->unit);
6554 if (!emit_dword(emit, ptoken.value))
6555 return FALSE;
6556
6557 /* Second token: total length of shader, in tokens. We can't fill this
6558 * in until we're all done. Emit zero for now.
6559 */
6560 return emit_dword(emit, 0);
6561 }
6562
6563
6564 static boolean
6565 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
6566 {
6567 VGPU10ProgramToken *tokens;
6568
6569 /* Replace the second token with total shader length */
6570 tokens = (VGPU10ProgramToken *) emit->buf;
6571 tokens[1].value = emit_get_num_tokens(emit);
6572
6573 return TRUE;
6574 }
6575
6576
6577 /**
6578 * Modify the FS to read the BCOLORs and use the FACE register
6579 * to choose between the front/back colors.
6580 */
6581 static const struct tgsi_token *
6582 transform_fs_twoside(const struct tgsi_token *tokens)
6583 {
6584 if (0) {
6585 debug_printf("Before tgsi_add_two_side ------------------\n");
6586 tgsi_dump(tokens,0);
6587 }
6588 tokens = tgsi_add_two_side(tokens);
6589 if (0) {
6590 debug_printf("After tgsi_add_two_side ------------------\n");
6591 tgsi_dump(tokens, 0);
6592 }
6593 return tokens;
6594 }
6595
6596
6597 /**
6598 * Modify the FS to do polygon stipple.
6599 */
6600 static const struct tgsi_token *
6601 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
6602 const struct tgsi_token *tokens)
6603 {
6604 const struct tgsi_token *new_tokens;
6605 unsigned unit;
6606
6607 if (0) {
6608 debug_printf("Before pstipple ------------------\n");
6609 tgsi_dump(tokens,0);
6610 }
6611
6612 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
6613 TGSI_FILE_INPUT);
6614
6615 emit->fs.pstipple_sampler_unit = unit;
6616
6617 /* Setup texture state for stipple */
6618 emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
6619 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
6620 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
6621 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
6622 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
6623
6624 if (0) {
6625 debug_printf("After pstipple ------------------\n");
6626 tgsi_dump(new_tokens, 0);
6627 }
6628
6629 return new_tokens;
6630 }
6631
6632 /**
6633 * Modify the FS to support anti-aliasing point.
6634 */
6635 static const struct tgsi_token *
6636 transform_fs_aapoint(const struct tgsi_token *tokens,
6637 int aa_coord_index)
6638 {
6639 if (0) {
6640 debug_printf("Before tgsi_add_aa_point ------------------\n");
6641 tgsi_dump(tokens,0);
6642 }
6643 tokens = tgsi_add_aa_point(tokens, aa_coord_index);
6644 if (0) {
6645 debug_printf("After tgsi_add_aa_point ------------------\n");
6646 tgsi_dump(tokens, 0);
6647 }
6648 return tokens;
6649 }
6650
6651 /**
6652 * This is the main entrypoint for the TGSI -> VPGU10 translator.
6653 */
6654 struct svga_shader_variant *
6655 svga_tgsi_vgpu10_translate(struct svga_context *svga,
6656 const struct svga_shader *shader,
6657 const struct svga_compile_key *key,
6658 unsigned unit)
6659 {
6660 struct svga_shader_variant *variant = NULL;
6661 struct svga_shader_emitter_v10 *emit;
6662 const struct tgsi_token *tokens = shader->tokens;
6663 struct svga_vertex_shader *vs = svga->curr.vs;
6664 struct svga_geometry_shader *gs = svga->curr.gs;
6665
6666 assert(unit == PIPE_SHADER_VERTEX ||
6667 unit == PIPE_SHADER_GEOMETRY ||
6668 unit == PIPE_SHADER_FRAGMENT);
6669
6670 /* These two flags cannot be used together */
6671 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
6672
6673 /*
6674 * Setup the code emitter
6675 */
6676 emit = alloc_emitter();
6677 if (!emit)
6678 return NULL;
6679
6680 emit->unit = unit;
6681 emit->key = *key;
6682
6683 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
6684 emit->key.gs.need_prescale);
6685 emit->vposition.tmp_index = INVALID_INDEX;
6686 emit->vposition.so_index = INVALID_INDEX;
6687 emit->vposition.out_index = INVALID_INDEX;
6688
6689 emit->fs.color_tmp_index = INVALID_INDEX;
6690 emit->fs.face_input_index = INVALID_INDEX;
6691 emit->fs.fragcoord_input_index = INVALID_INDEX;
6692
6693 emit->gs.prim_id_index = INVALID_INDEX;
6694
6695 emit->clip_dist_out_index = INVALID_INDEX;
6696 emit->clip_dist_tmp_index = INVALID_INDEX;
6697 emit->clip_dist_so_index = INVALID_INDEX;
6698 emit->clip_vertex_out_index = INVALID_INDEX;
6699
6700 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
6701 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
6702 }
6703
6704 if (unit == PIPE_SHADER_FRAGMENT) {
6705 if (key->fs.light_twoside) {
6706 tokens = transform_fs_twoside(tokens);
6707 }
6708 if (key->fs.pstipple) {
6709 const struct tgsi_token *new_tokens =
6710 transform_fs_pstipple(emit, tokens);
6711 if (tokens != shader->tokens) {
6712 /* free the two-sided shader tokens */
6713 tgsi_free_tokens(tokens);
6714 }
6715 tokens = new_tokens;
6716 }
6717 if (key->fs.aa_point) {
6718 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
6719 }
6720 }
6721
6722 if (SVGA_DEBUG & DEBUG_TGSI) {
6723 debug_printf("#####################################\n");
6724 debug_printf("### TGSI Shader %u\n", shader->id);
6725 tgsi_dump(tokens, 0);
6726 }
6727
6728 /**
6729 * Rescan the header if the token string is different from the one
6730 * included in the shader; otherwise, the header info is already up-to-date
6731 */
6732 if (tokens != shader->tokens) {
6733 tgsi_scan_shader(tokens, &emit->info);
6734 } else {
6735 emit->info = shader->info;
6736 }
6737
6738 emit->num_outputs = emit->info.num_outputs;
6739
6740 if (unit == PIPE_SHADER_FRAGMENT) {
6741 /* Compute FS input remapping to match the output from VS/GS */
6742 if (gs) {
6743 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage);
6744 } else {
6745 assert(vs);
6746 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6747 }
6748 } else if (unit == PIPE_SHADER_GEOMETRY) {
6749 assert(vs);
6750 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
6751 }
6752
6753 determine_clipping_mode(emit);
6754
6755 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
6756 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
6757 /* if there is stream output declarations associated
6758 * with this shader or the shader writes to ClipDistance
6759 * then reserve extra registers for the non-adjusted vertex position
6760 * and the ClipDistance shadow copy
6761 */
6762 emit->vposition.so_index = emit->num_outputs++;
6763
6764 if (emit->clip_mode == CLIP_DISTANCE) {
6765 emit->clip_dist_so_index = emit->num_outputs++;
6766 if (emit->info.num_written_clipdistance > 4)
6767 emit->num_outputs++;
6768 }
6769 }
6770 }
6771
6772 /*
6773 * Do actual shader translation.
6774 */
6775 if (!emit_vgpu10_header(emit)) {
6776 debug_printf("svga: emit VGPU10 header failed\n");
6777 goto cleanup;
6778 }
6779
6780 if (!emit_vgpu10_instructions(emit, tokens)) {
6781 debug_printf("svga: emit VGPU10 instructions failed\n");
6782 goto cleanup;
6783 }
6784
6785 if (!emit_vgpu10_tail(emit)) {
6786 debug_printf("svga: emit VGPU10 tail failed\n");
6787 goto cleanup;
6788 }
6789
6790 if (emit->register_overflow) {
6791 goto cleanup;
6792 }
6793
6794 /*
6795 * Create, initialize the 'variant' object.
6796 */
6797 variant = svga_new_shader_variant(svga);
6798 if (!variant)
6799 goto cleanup;
6800
6801 variant->shader = shader;
6802 variant->nr_tokens = emit_get_num_tokens(emit);
6803 variant->tokens = (const unsigned *)emit->buf;
6804 emit->buf = NULL; /* buffer is no longer owed by emitter context */
6805 memcpy(&variant->key, key, sizeof(*key));
6806 variant->id = UTIL_BITMASK_INVALID_INDEX;
6807
6808 /* The extra constant starting offset starts with the number of
6809 * shader constants declared in the shader.
6810 */
6811 variant->extra_const_start = emit->num_shader_consts[0];
6812 if (key->gs.wide_point) {
6813 /**
6814 * The extra constant added in the transformed shader
6815 * for inverse viewport scale is to be supplied by the driver.
6816 * So the extra constant starting offset needs to be reduced by 1.
6817 */
6818 assert(variant->extra_const_start > 0);
6819 variant->extra_const_start--;
6820 }
6821
6822 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
6823
6824 /* If there was exactly one write to a fragment shader output register
6825 * and it came from a constant buffer, we know all fragments will have
6826 * the same color (except for blending).
6827 */
6828 variant->constant_color_output =
6829 emit->constant_color_output && emit->num_output_writes == 1;
6830
6831 /** keep track in the variant if flat interpolation is used
6832 * for any of the varyings.
6833 */
6834 variant->uses_flat_interp = emit->uses_flat_interp;
6835
6836 if (tokens != shader->tokens) {
6837 tgsi_free_tokens(tokens);
6838 }
6839
6840 cleanup:
6841 free_emitter(emit);
6842
6843 return variant;
6844 }